├── Doc ├── GettingStarted.md ├── heat.png ├── teapot.png └── teapot180.png ├── LICENSE ├── Lib ├── Common │ ├── Heap.h │ ├── Seq.h │ └── Stack.h ├── Kernel.cpp ├── Kernel.h ├── Params.h ├── QPULib.h ├── Source │ ├── Cond.h │ ├── Float.cpp │ ├── Float.h │ ├── Gen.cpp │ ├── Gen.h │ ├── Int.cpp │ ├── Int.h │ ├── Interpreter.cpp │ ├── Interpreter.h │ ├── Pretty.cpp │ ├── Pretty.h │ ├── Ptr.h │ ├── Stmt.cpp │ ├── Stmt.h │ ├── StmtExtra.h │ ├── Syntax.cpp │ ├── Syntax.h │ ├── Translate.cpp │ └── Translate.h ├── Target │ ├── CFG.cpp │ ├── CFG.h │ ├── Emulator.cpp │ ├── Emulator.h │ ├── Encode.cpp │ ├── Encode.h │ ├── LiveRangeSplit.cpp │ ├── LiveRangeSplit.h │ ├── Liveness.cpp │ ├── Liveness.h │ ├── LoadStore.cpp │ ├── LoadStore.h │ ├── Pretty.cpp │ ├── Pretty.h │ ├── ReachingDefs.cpp │ ├── ReachingDefs.h │ ├── RegAlloc.cpp │ ├── RegAlloc.h │ ├── RemoveLabels.cpp │ ├── RemoveLabels.h │ ├── Satisfy.cpp │ ├── Satisfy.h │ ├── SmallLiteral.cpp │ ├── SmallLiteral.h │ ├── Subst.cpp │ ├── Subst.h │ ├── Syntax.cpp │ └── Syntax.h └── VideoCore │ ├── Invoke.cpp │ ├── Invoke.h │ ├── Mailbox.cpp │ ├── Mailbox.h │ ├── SharedArray.h │ ├── VideoCore.cpp │ └── VideoCore.h ├── README.md └── Tests ├── AutoTest.cpp ├── GCD.cpp ├── HeatMap.cpp ├── HeatMapScalar.cpp ├── Hello.cpp ├── ID.cpp ├── Makefile ├── MultiTri.cpp ├── OET.cpp ├── Print.cpp ├── ReqRecv.cpp ├── Rot3D.cpp ├── Sort.cpp ├── Tri.cpp └── TriFloat.cpp /Doc/GettingStarted.md: -------------------------------------------------------------------------------- 1 | ### Getting started 2 | 3 | On recent Raspbian distributions, QPULib should work out-of-the-box. 4 | It's been tested on the Pi 1 Model B, the Pi 2, but not yet the Pi 5 | 3. 6 | 7 | ##### Building and running the GCD example 8 | 9 | Try the commands below to build and run the `GCD` example. 10 | 11 | ``` 12 | sudo apt-get install git 13 | git clone https://github.com/mn416/QPULib 14 | cd QPULib/Tests 15 | make QPU=1 GCD 16 | sudo ./GCD 17 | ``` 18 | 19 | The `QPU=1` flag to `make` indicates that the physical QPUs on the 20 | Raspberry Pi should be used to run QPULib kernels. Simply using 21 | `make` without `QPU=1`, or setting `QPU` to any value other than 1, 22 | will lead to **emulation mode** being used. As the name suggests, 23 | this means that QPU code will be emulated in software. This is useful 24 | for debugging, and also allows you to run QPULib programs on a PC if 25 | your Pi is not to hand. If you want to recompile with a different 26 | flag then do a `make clean` first. 27 | 28 | Strictly speaking, any program that works in emulation mode but not on 29 | the Pi's physical QPUs is probably a bug in QPULib and should be 30 | reported, although there may be valid explanations for such 31 | differences. 32 | 33 | ##### Other examples 34 | 35 | Here is a quick overview of some the other examples, which can be 36 | built in the same way as GCD: 37 | 38 | Example | Description | Output 39 | --------- | ----------- | ------ 40 | GCD | [Euclid's algorithm](https://en.wikipedia.org/wiki/Euclidean_algorithm) | The GCD's of some random pairs of integers 41 | Tri | Computes [triangular numbers](https://en.wikipedia.org/wiki/Triangular_number) | The first 16 triangular numbers 42 | MultiTri | Like Tri, but uses 4 QPUs instead of 1 | The first 64 triangular numbers 43 | TriFloat | Like Tri but casts floats to integers and back | The first 16 triangular numbers 44 | OET | [Odd-even transposition sorter](https://en.wikipedia.org/wiki/Odd%E2%80%93even_sort) | 32 integers, sorted 45 | HeatMap | Modelling heat flow across a 2D surface | An image in [pgm](http://netpbm.sourceforge.net/doc/pgm.html) format, and the time taken 46 | Rot3D | 3D rotation of a random object | The time taken 47 | 48 | ##### AutoTest: specification-based testing of the compiler 49 | 50 | Another program in the `Tests` directory worth mentioning is 51 | `AutoTest`: it generates random QPULib programs, runs them on the both 52 | source language interpreter and the target language emulator, and 53 | checks for equivalance. Currently, it only works in emulation mode. 54 | 55 | ##### CPU/GPU memory split 56 | 57 | Depending on your plans, it may be useful to ensure that plenty of 58 | memory is available to the GPU. This can be done by using 59 | `raspi-config`, selecting `Advanced Options` and then `Memory Split`: 60 | (On a Raspberry Pi 1 Model B, 32M seems to be the minimum that works 61 | for me.) 62 | -------------------------------------------------------------------------------- /Doc/heat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mn416/QPULib/c2658a55f2b21de6f3270c86c227011b10d447c2/Doc/heat.png -------------------------------------------------------------------------------- /Doc/teapot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mn416/QPULib/c2658a55f2b21de6f3270c86c227011b10d447c2/Doc/teapot.png -------------------------------------------------------------------------------- /Doc/teapot180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mn416/QPULib/c2658a55f2b21de6f3270c86c227011b10d447c2/Doc/teapot180.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | QPULib is released under the MIT License below. This covers all files 2 | in the QPULib repository, except for: 3 | 4 | * Doc/teapot.png 5 | * Doc/teapot180.png 6 | * Lib/VideoCore/Mailbox.cpp 7 | * Lib/VideoCore/Mailbox.h 8 | 9 | MIT License 10 | 11 | Copyright (c) 2016 Matthew Naylor 12 | 13 | Permission is hereby granted, free of charge, to any person obtaining 14 | a copy of this software and associated documentation files (the 15 | "Software"), to deal in the Software without restriction, including 16 | without limitation the rights to use, copy, modify, merge, publish, 17 | distribute, sublicense, and/or sell copies of the Software, and to 18 | permit persons to whom the Software is furnished to do so, subject to 19 | the following conditions: 20 | 21 | The above copyright notice and this permission notice shall be 22 | included in all copies or substantial portions of the Software. 23 | 24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 27 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 28 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 29 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 30 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 31 | -------------------------------------------------------------------------------- /Lib/Common/Heap.h: -------------------------------------------------------------------------------- 1 | #ifndef _HEAP_H_ 2 | #define _HEAP_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class Heap 9 | { 10 | public: 11 | uint8_t *base; 12 | unsigned long size, capacity; 13 | const char* heapName; 14 | 15 | // Construct an empty heap 16 | Heap() 17 | { 18 | base = NULL; 19 | capacity = 0; 20 | size = 0; 21 | heapName = "unnamed"; 22 | } 23 | 24 | // Create a heap of a given number of bytes 25 | void create(unsigned int heapCapacityInBytes) 26 | { 27 | if (base != NULL) free(base); 28 | capacity = heapCapacityInBytes; 29 | base = (uint8_t*) malloc(heapCapacityInBytes); 30 | size = 0; 31 | } 32 | 33 | Heap(unsigned int heapCapacityInBytes) 34 | { 35 | heapName = ""; 36 | create(heapCapacityInBytes); 37 | } 38 | 39 | Heap(const char* name, unsigned int heapCapacityInBytes) 40 | { 41 | heapName = name; 42 | create(heapCapacityInBytes); 43 | } 44 | 45 | // Allocate 'n' elements of type T on the heap 46 | template T* alloc(unsigned long n) 47 | { 48 | unsigned long nbytes = sizeof(T) * n; 49 | if (size + nbytes >= capacity) { 50 | printf("QPULib error: heap '%s' is full.\n", heapName); 51 | exit(-1); 52 | return (T*) NULL; 53 | } 54 | else { 55 | uint8_t* p = base + size; 56 | size += nbytes; 57 | return (T*) p; 58 | } 59 | } 60 | 61 | // Allocate one element of type T on the heap 62 | template T* alloc() 63 | { 64 | return alloc(1); 65 | } 66 | 67 | // Free all the structures allocated on the heap 68 | void clear() 69 | { 70 | size = 0; 71 | } 72 | 73 | // Destroy the heap 74 | void destroy() 75 | { 76 | free(base); 77 | base = NULL; 78 | capacity = 0; 79 | size = 0; 80 | } 81 | 82 | // Destructor 83 | ~Heap() 84 | { 85 | destroy(); 86 | } 87 | }; 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /Lib/Common/Seq.h: -------------------------------------------------------------------------------- 1 | // Sequence data type 2 | 3 | #ifndef _SEQ_H_ 4 | #define _SEQ_H_ 5 | 6 | #define INITIAL_MAX_ELEMS 1024 7 | 8 | #include 9 | #include 10 | 11 | template class Seq 12 | { 13 | private: 14 | // Initialisation 15 | void init(int initialSize) 16 | { 17 | maxElems = initialSize; 18 | numElems = 0; 19 | elems = new T[initialSize]; 20 | } 21 | 22 | public: 23 | int maxElems; 24 | int numElems; 25 | T* elems; 26 | 27 | // Constructors 28 | Seq() { init(INITIAL_MAX_ELEMS); } 29 | Seq(int initialSize) { init(initialSize); } 30 | 31 | // Copy constructor 32 | Seq(const Seq& seq) { 33 | init(seq.maxElems); 34 | numElems = seq.numElems; 35 | for (int i = 0; i < seq.numElems; i++) 36 | elems[i] = seq.elems[i]; 37 | } 38 | 39 | // Set capacity of sequence 40 | void setCapacity(int n) { 41 | maxElems = n; 42 | T* newElems = new T[maxElems]; 43 | for (int i = 0; i < numElems-1; i++) 44 | newElems[i] = elems[i]; 45 | delete [] elems; 46 | elems = newElems; 47 | } 48 | 49 | // Extend size of sequence by one 50 | void extend() 51 | { 52 | numElems++; 53 | if (numElems > maxElems) 54 | setCapacity(maxElems*2); 55 | } 56 | 57 | // Append 58 | void append(T x) 59 | { 60 | extend(); 61 | elems[numElems-1] = x; 62 | } 63 | 64 | // Delete last element 65 | void deleteLast() 66 | { 67 | numElems--; 68 | } 69 | 70 | // Push 71 | void push(T x) { append(x); } 72 | 73 | // Pop 74 | T pop() { 75 | numElems--; 76 | return elems[numElems]; 77 | } 78 | 79 | // Clear the sequence 80 | void clear() 81 | { 82 | numElems = 0; 83 | } 84 | 85 | // Is given value already in sequence? 86 | bool member(T x) { 87 | for (int i = 0; i < numElems; i++) 88 | if (elems[i] == x) return true; 89 | return false; 90 | } 91 | 92 | // Insert element into sequence if not already present 93 | bool insert(T x) { 94 | bool alreadyPresent = member(x); 95 | if (!alreadyPresent) append(x); 96 | return !alreadyPresent; 97 | } 98 | 99 | // Remove element at index 100 | T remove(int index) { 101 | assert(index < numElems); 102 | T x = elems[index]; 103 | for (int j = index; j < numElems-1; j++) 104 | elems[j] = elems[j+1]; 105 | numElems--; 106 | return x; 107 | } 108 | 109 | // Destructor 110 | ~Seq() 111 | { 112 | delete [] elems; 113 | } 114 | }; 115 | 116 | // A small sequence is just a sequence with a small initial size 117 | template class SmallSeq : public Seq { 118 | public: 119 | SmallSeq() : Seq(8) {}; 120 | }; 121 | 122 | #endif 123 | -------------------------------------------------------------------------------- /Lib/Common/Stack.h: -------------------------------------------------------------------------------- 1 | #ifndef _STACK_H_ 2 | #define _STACK_H_ 3 | 4 | #include 5 | #include 6 | 7 | template class StackItem 8 | { 9 | public: 10 | T* head; 11 | StackItem* tail; 12 | }; 13 | 14 | template class Stack 15 | { 16 | public: 17 | unsigned int size; 18 | StackItem* topItem; 19 | 20 | // Constructor 21 | Stack() 22 | { 23 | topItem = NULL; 24 | size = 0; 25 | } 26 | 27 | // Push 28 | void push(T* x) 29 | { 30 | StackItem* oldTop = topItem; 31 | topItem = new StackItem; 32 | topItem->head = x; 33 | topItem->tail = oldTop; 34 | size++; 35 | } 36 | 37 | // Pop 38 | void pop() 39 | { 40 | assert(size > 0); 41 | StackItem* oldTop = topItem; 42 | topItem = topItem->tail; 43 | delete oldTop; 44 | size--; 45 | } 46 | 47 | // Top 48 | T* top() 49 | { 50 | assert(size > 0); 51 | return topItem->head; 52 | } 53 | 54 | // Replace the top element 55 | void replace(T* x) 56 | { 57 | topItem->head = x; 58 | } 59 | 60 | // Clear the stack 61 | void clear() 62 | { 63 | StackItem* p; 64 | for (int i = 0; i < size; i++) { 65 | p = topItem->tail; 66 | delete topItem; 67 | topItem = p; 68 | } 69 | size = 0; 70 | } 71 | 72 | // Obtain Nth element from the top 73 | T* index(unsigned int n) 74 | { 75 | assert(n < size); 76 | StackItem* p = topItem; 77 | for (int i = 0; i < n; i++) 78 | p = p->tail; 79 | return p->head; 80 | } 81 | 82 | // Destructor 83 | ~Stack() 84 | { 85 | clear(); 86 | } 87 | }; 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /Lib/Kernel.cpp: -------------------------------------------------------------------------------- 1 | #include "Source/Pretty.h" 2 | #include "Source/Translate.h" 3 | #include "Target/Pretty.h" 4 | #include "Target/Emulator.h" 5 | #include "Target/RemoveLabels.h" 6 | #include "Target/CFG.h" 7 | #include "Target/Liveness.h" 8 | #include "Target/ReachingDefs.h" 9 | #include "Target/LiveRangeSplit.h" 10 | #include "Target/RegAlloc.h" 11 | #include "Target/Satisfy.h" 12 | #include "Target/LoadStore.h" 13 | #include "Target/Encode.h" 14 | 15 | // ============================================================================ 16 | // Compile kernel 17 | // ============================================================================ 18 | 19 | void compileKernel(Seq* targetCode, Stmt* body) 20 | { 21 | #ifdef DEBUG 22 | printf("Source code\n"); 23 | printf("===========\n\n"); 24 | pretty(body); 25 | printf("\n"); 26 | #endif 27 | 28 | // Translate to target code 29 | translateStmt(targetCode, body); 30 | 31 | // Load/store pass 32 | loadStorePass(targetCode); 33 | 34 | // Construct control-flow graph 35 | CFG cfg; 36 | buildCFG(targetCode, &cfg); 37 | 38 | // Apply live-range splitter 39 | //liveRangeSplit(targetCode, &cfg); 40 | 41 | // Perform register allocation 42 | regAlloc(&cfg, targetCode); 43 | 44 | // Satisfy target code constraints 45 | satisfy(targetCode); 46 | 47 | #ifdef DEBUG 48 | printf("Target code\n"); 49 | printf("===========\n\n"); 50 | for (int i = 0; i < targetCode->numElems; i++) { 51 | printf("%i: ", i); 52 | pretty(targetCode->elems[i]); 53 | } 54 | printf("\n"); 55 | #endif 56 | 57 | // Translate branch-to-labels to relative branches 58 | removeLabels(targetCode); 59 | } 60 | -------------------------------------------------------------------------------- /Lib/Kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KERNEL_H_ 2 | #define _KERNEL_H_ 3 | 4 | #include "Source/Interpreter.h" 5 | #include "Target/Emulator.h" 6 | #include "Target/Encode.h" 7 | #include "VideoCore/SharedArray.h" 8 | #include "VideoCore/Invoke.h" 9 | #include "VideoCore/VideoCore.h" 10 | 11 | // ============================================================================ 12 | // Modes of operation 13 | // ============================================================================ 14 | 15 | // Two important compile-time macros are EMULATION_MODE and QPU_MODE. 16 | // With -D EMULATION_MODE, QPULib can be compiled for any architecture. 17 | // With -D QPU_MODE, QPULib can be compiled only for the Raspberry Pi. 18 | // At least one of these macros must be defined. 19 | 20 | // IN EMULATION_MODE a memory pool is used for allocating data that 21 | // can be read by kernels. Otherwise, a mailbox interface to the 22 | // VideoCore is used to allocate memory. In both cases, see 23 | // 'VideoCore/SharedArray.h'. 24 | 25 | // The 'Kernel' class provides various ways to invoke a kernel: 26 | // 27 | // * qpu(...) invoke kernel on physical QPUs 28 | // (only available in QPU_MODE) 29 | // * emulate(...) invoke kernel using target code emulator 30 | // (only available in EMULATION_MODE) 31 | // * interpret(...) invoke kernel using source code interpreter 32 | // (only available in EMULATION_MODE) 33 | // * call(...) in EMULATION_MODE, same as emulate(...) 34 | // in QPU_MODE, same as qpu(...) 35 | // in EMULATION_MODE *and* QPU_MODE, same as emulate(...) 36 | 37 | // Notice it is OK to compile with both -D EMULATION_MODE *and* 38 | // -D QPU_MODE. This feature is provided for doing equivalance 39 | // testing between the physical QPU and the QPU emulator. However, 40 | // EMULATION_MODE introduces a performance penalty and should be used 41 | // only for testing and debugging purposes. 42 | 43 | // Maximum number of kernel parameters allowed 44 | #define MAX_KERNEL_PARAMS 128 45 | 46 | // ============================================================================ 47 | // Kernel arguments 48 | // ============================================================================ 49 | 50 | // Construct an argument of QPU type 't'. 51 | 52 | template inline t mkArg(); 53 | 54 | template <> inline Int mkArg() { 55 | Int x; 56 | x = getUniformInt(); 57 | return x; 58 | } 59 | 60 | template <> inline Float mkArg() { 61 | Float x; 62 | x = getUniformFloat(); 63 | return x; 64 | } 65 | 66 | template <> inline Ptr mkArg< Ptr >() { 67 | Ptr x; 68 | x = getUniformPtr(); 69 | return x; 70 | } 71 | 72 | template <> inline Ptr mkArg< Ptr >() { 73 | Ptr x; 74 | x = getUniformPtr(); 75 | return x; 76 | } 77 | 78 | // ============================================================================ 79 | // Parameter passing 80 | // ============================================================================ 81 | 82 | template inline void nothing(ts... args) {} 83 | 84 | // Pass argument of ARM type 'u' as parameter of QPU type 't'. 85 | 86 | template inline bool 87 | passParam(Seq* uniforms, u x); 88 | 89 | // Pass an int 90 | template <> inline bool passParam 91 | (Seq* uniforms, int x) 92 | { 93 | uniforms->append((int32_t) x); 94 | return true; 95 | } 96 | 97 | // Pass a float 98 | template <> inline bool passParam 99 | (Seq* uniforms, float x) 100 | { 101 | int32_t* bits = (int32_t*) &x; 102 | uniforms->append(*bits); 103 | return true; 104 | } 105 | 106 | // Pass a SharedArray* 107 | template <> inline bool passParam< Ptr, SharedArray* > 108 | (Seq* uniforms, SharedArray* p) 109 | { 110 | uniforms->append(p->getAddress()); 111 | return true; 112 | } 113 | 114 | // Pass a SharedArray* 115 | template <> inline bool passParam< Ptr>, SharedArray* > 116 | (Seq* uniforms, SharedArray* p) 117 | { 118 | uniforms->append(p->getAddress()); 119 | return true; 120 | } 121 | 122 | // Pass a SharedArray* 123 | template <> inline bool passParam< Ptr, SharedArray* > 124 | (Seq* uniforms, SharedArray* p) 125 | { 126 | uniforms->append(p->getAddress()); 127 | return true; 128 | } 129 | 130 | // Pass a SharedArray* 131 | template <> inline bool passParam< Ptr>, SharedArray* > 132 | (Seq* uniforms, SharedArray* p) 133 | { 134 | uniforms->append(p->getAddress()); 135 | return true; 136 | } 137 | 138 | // ============================================================================ 139 | // Functions on kernels 140 | // ============================================================================ 141 | 142 | // Compile a kernel 143 | void compileKernel(Seq* targetCode, Stmt* s); 144 | 145 | // ============================================================================ 146 | // Kernels 147 | // ============================================================================ 148 | 149 | // A kernel is parameterised by a list of QPU types 'ts' representing 150 | // the types of the parameters that the kernel takes. 151 | 152 | // The kernel constructor takes a function with parameters of QPU 153 | // types 'ts'. It applies the function to constuct an AST. 154 | 155 | template struct Kernel { 156 | // AST representing the source code 157 | Stmt* sourceCode; 158 | 159 | // AST representing the target code 160 | Seq targetCode; 161 | 162 | // Parameters to be passed to kernel 163 | Seq uniforms; 164 | 165 | // The number of variables in the source code 166 | int numVars; 167 | 168 | // Number of QPUs to run on 169 | int numQPUs; 170 | 171 | // Memory region for QPU code and parameters 172 | #ifdef QPU_MODE 173 | SharedArray* qpuCodeMem; 174 | int qpuCodeMemOffset; 175 | #endif 176 | 177 | // Construct kernel out of C++ function 178 | Kernel(void (*f)(ts... params)) { 179 | numQPUs = 1; 180 | 181 | // Initialise AST constructors 182 | #ifndef EMULATION_MODE 183 | astHeap.clear(); 184 | #endif 185 | controlStack.clear(); 186 | stmtStack.clear(); 187 | stmtStack.push(mkSkip()); 188 | resetFreshVarGen(); 189 | resetFreshLabelGen(); 190 | 191 | // Reserved general-purpose variables 192 | Int qpuId, qpuCount, readStride, writeStride; 193 | qpuId = getUniformInt(); 194 | qpuCount = getUniformInt(); 195 | 196 | // Construct the AST 197 | f(mkArg()...); 198 | 199 | // QPU code to cleanly exit 200 | kernelFinish(); 201 | 202 | // Obtain the AST 203 | Stmt* body = stmtStack.top(); 204 | stmtStack.pop(); 205 | 206 | // Save pointer to source program for interpreter 207 | #ifdef EMULATION_MODE 208 | sourceCode = body; 209 | #else 210 | sourceCode = NULL; 211 | #endif 212 | 213 | // Compile 214 | compileKernel(&targetCode, body); 215 | 216 | // Remember the number of variables used 217 | numVars = getFreshVarCount(); 218 | 219 | #ifdef QPU_MODE 220 | enableQPUs(); 221 | 222 | // Allocate code mem 223 | qpuCodeMem = new SharedArray; 224 | 225 | // Encode target instrs into array of 32-bit ints 226 | Seq code; 227 | encode(&targetCode, &code); 228 | 229 | // Allocate memory for QPU code and parameters 230 | int numWords = code.numElems + 12*MAX_KERNEL_PARAMS + 12*2; 231 | qpuCodeMem->alloc(numWords); 232 | 233 | // Copy kernel to code memory 234 | int offset = 0; 235 | for (int i = 0; i < code.numElems; i++) { 236 | (*qpuCodeMem)[offset++] = code.elems[i]; 237 | } 238 | qpuCodeMemOffset = offset; 239 | #endif 240 | } 241 | 242 | #ifdef EMULATION_MODE 243 | template void emu(us... args) { 244 | // Pass params, checking arguments types us against parameter types ts 245 | uniforms.clear(); 246 | nothing(passParam(&uniforms, args)...); 247 | 248 | emulate 249 | ( numQPUs // Number of QPUs active 250 | , &targetCode // Instruction sequence 251 | , numVars // Number of vars in source 252 | , &uniforms // Kernel parameters 253 | , NULL // Use stdout 254 | ); 255 | } 256 | #endif 257 | 258 | // Invoke the interpreter 259 | #ifdef EMULATION_MODE 260 | template void interpret(us... args) { 261 | // Pass params, checking arguments types us against parameter types ts 262 | uniforms.clear(); 263 | nothing(passParam(&uniforms, args)...); 264 | 265 | interpreter 266 | ( numQPUs // Number of QPUs active 267 | , sourceCode // Source program 268 | , numVars // Number of vars in source 269 | , &uniforms // Kernel parameters 270 | , NULL // Use stdout 271 | ); 272 | } 273 | #endif 274 | 275 | // Invoke kernel on physical QPU hardware 276 | #ifdef QPU_MODE 277 | template void qpu(us... args) { 278 | // Pass params, checking arguments types us against parameter types ts 279 | uniforms.clear(); 280 | nothing(passParam(&uniforms, args)...); 281 | 282 | // Invoke kernel on QPUs 283 | invoke(numQPUs, *qpuCodeMem, qpuCodeMemOffset, &uniforms); 284 | } 285 | #endif 286 | 287 | // Invoke the kernel 288 | template void call(us... args) { 289 | #ifdef EMULATION_MODE 290 | emu(args...); 291 | #else 292 | #ifdef QPU_MODE 293 | qpu(args...); 294 | #endif 295 | #endif 296 | }; 297 | 298 | // Overload function application operator 299 | template void operator()(us... args) { 300 | call(args...); 301 | } 302 | 303 | // Set number of QPUs to use 304 | void setNumQPUs(int n) { 305 | numQPUs = n; 306 | } 307 | 308 | // Deconstructor 309 | ~Kernel() { 310 | #ifdef QPU_MODE 311 | delete qpuCodeMem; 312 | disableQPUs(); 313 | #endif 314 | } 315 | }; 316 | 317 | // Initialiser 318 | 319 | template Kernel compile(void (*f)(ts... params)) 320 | { 321 | Kernel k(f); 322 | return k; 323 | } 324 | 325 | #endif 326 | -------------------------------------------------------------------------------- /Lib/Params.h: -------------------------------------------------------------------------------- 1 | #ifndef _PARAMS_H_ 2 | #define _PARAMS_H_ 3 | 4 | // Memory available for constructing abstract syntax trees 5 | #define AST_HEAP_SIZE 131072 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /Lib/QPULib.h: -------------------------------------------------------------------------------- 1 | #ifndef _QPULIB_H_ 2 | #define _QPULIB_H_ 3 | 4 | #include "Source/Int.h" 5 | #include "Source/Float.h" 6 | #include "Source/Ptr.h" 7 | #include "Source/Cond.h" 8 | #include "Source/Stmt.h" 9 | #include "Kernel.h" 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /Lib/Source/Cond.h: -------------------------------------------------------------------------------- 1 | #ifndef _SOURCE_COND_H_ 2 | #define _SOURCE_COND_H_ 3 | 4 | #include "Source/Syntax.h" 5 | #include "Source/Int.h" 6 | 7 | // ============================================================================ 8 | // Types 9 | // ============================================================================ 10 | 11 | struct Cond 12 | { 13 | // Abstract syntax tree 14 | CExpr* cexpr; 15 | // Constructor 16 | Cond(CExpr* c) { cexpr = c; } 17 | }; 18 | 19 | struct BoolExpr 20 | { 21 | // Abstract syntax tree 22 | BExpr* bexpr; 23 | // Constructor 24 | BoolExpr(BExpr* b) { bexpr = b; } 25 | // Cast to Cond 26 | //operator Cond(); 27 | }; 28 | 29 | // ============================================================================ 30 | // Generic 'Int' comparison 31 | // ============================================================================ 32 | 33 | inline BoolExpr mkIntCmp(IntExpr a, CmpOp op, IntExpr b) 34 | { return BoolExpr(mkCmp(a.expr, op, b.expr)); } 35 | 36 | // ============================================================================ 37 | // Specific 'Int' comparisons 38 | // ============================================================================ 39 | 40 | inline BoolExpr operator==(IntExpr a, IntExpr b) 41 | { return mkIntCmp(a, mkCmpOp(EQ, INT32), b); } 42 | 43 | inline BoolExpr operator!=(IntExpr a, IntExpr b) 44 | { return mkIntCmp(a, mkCmpOp(NEQ, INT32), b); } 45 | 46 | inline BoolExpr operator<(IntExpr a, IntExpr b) 47 | { return mkIntCmp(a, mkCmpOp(LT, INT32), b); } 48 | 49 | inline BoolExpr operator<=(IntExpr a, IntExpr b) 50 | { return mkIntCmp(a, mkCmpOp(LE, INT32), b); } 51 | 52 | inline BoolExpr operator>(IntExpr a, IntExpr b) 53 | { return mkIntCmp(a, mkCmpOp(GT, INT32), b); } 54 | 55 | inline BoolExpr operator>=(IntExpr a, IntExpr b) 56 | { return mkIntCmp(a, mkCmpOp(GE, INT32), b); } 57 | 58 | // ============================================================================ 59 | // Generic 'Float' comparison 60 | // ============================================================================ 61 | 62 | inline BoolExpr mkFloatCmp(FloatExpr a, CmpOp op, FloatExpr b) 63 | { return BoolExpr(mkCmp(a.expr, op, b.expr)); } 64 | 65 | // ============================================================================ 66 | // Specific 'Float' comparisons 67 | // ============================================================================ 68 | 69 | inline BoolExpr operator==(FloatExpr a, FloatExpr b) 70 | { return mkFloatCmp(a, mkCmpOp(EQ, FLOAT), b); } 71 | 72 | inline BoolExpr operator!=(FloatExpr a, FloatExpr b) 73 | { return mkFloatCmp(a, mkCmpOp(NEQ, FLOAT), b); } 74 | 75 | inline BoolExpr operator<(FloatExpr a, FloatExpr b) 76 | { return mkFloatCmp(a, mkCmpOp(LT, FLOAT), b); } 77 | 78 | inline BoolExpr operator<=(FloatExpr a, FloatExpr b) 79 | { return mkFloatCmp(a, mkCmpOp(LE, FLOAT), b); } 80 | 81 | inline BoolExpr operator>(FloatExpr a, FloatExpr b) 82 | { return mkFloatCmp(a, mkCmpOp(GT, FLOAT), b); } 83 | 84 | inline BoolExpr operator>=(FloatExpr a, FloatExpr b) 85 | { return mkFloatCmp(a, mkCmpOp(GE, FLOAT), b); } 86 | 87 | // ============================================================================ 88 | // Boolean operators 89 | // ============================================================================ 90 | 91 | inline BoolExpr operator!(BoolExpr a) 92 | { return BoolExpr(mkNot(a.bexpr)); } 93 | 94 | inline BoolExpr operator&&(BoolExpr a, BoolExpr b) 95 | { return BoolExpr(mkAnd(a.bexpr, b.bexpr)); } 96 | 97 | inline BoolExpr operator||(BoolExpr a, BoolExpr b) 98 | { return BoolExpr(mkOr(a.bexpr, b.bexpr)); } 99 | 100 | inline Cond any(BoolExpr a) 101 | { return Cond(mkAny(a.bexpr)); } 102 | 103 | inline Cond all(BoolExpr a) 104 | { return Cond(mkAll(a.bexpr)); } 105 | 106 | #endif 107 | -------------------------------------------------------------------------------- /Lib/Source/Float.cpp: -------------------------------------------------------------------------------- 1 | #include "Source/Float.h" 2 | #include "Source/Stmt.h" 3 | 4 | // ============================================================================ 5 | // Type 'FloatExpr' 6 | // ============================================================================ 7 | 8 | // Constructors 9 | 10 | FloatExpr::FloatExpr() { this->expr = NULL; } 11 | 12 | FloatExpr::FloatExpr(float x) { this->expr = mkFloatLit(x); } 13 | 14 | // Helper constructor 15 | 16 | inline FloatExpr mkFloatExpr(Expr* e) { FloatExpr x; x.expr = e; return x; } 17 | 18 | // ============================================================================ 19 | // Type 'Float' 20 | // ============================================================================ 21 | 22 | // Constructors 23 | 24 | Float::Float() { 25 | Var v = freshVar(); 26 | this->expr = mkVar(v); 27 | } 28 | 29 | Float::Float(float x) { 30 | Var v = freshVar(); 31 | this->expr = mkVar(v); 32 | assign(this->expr, mkFloatLit(x)); 33 | } 34 | 35 | Float::Float(FloatExpr e) { 36 | Var v = freshVar(); 37 | this->expr = mkVar(v); 38 | assign(this->expr, e.expr); 39 | } 40 | 41 | // Copy constructors 42 | 43 | Float::Float(Float& x) { 44 | Var v = freshVar(); 45 | this->expr = mkVar(v); 46 | assign(this->expr, x.expr); 47 | } 48 | 49 | Float::Float(const Float& x) { 50 | Var v = freshVar(); 51 | this->expr = mkVar(v); 52 | assign(this->expr, x.expr); 53 | } 54 | 55 | // Cast to an FloatExpr 56 | 57 | Float::operator FloatExpr() { return mkFloatExpr(this->expr); } 58 | 59 | // Assignment 60 | 61 | Float& Float::operator=(Float& rhs) 62 | { assign(this->expr, rhs.expr); return rhs; } 63 | 64 | FloatExpr Float::operator=(FloatExpr rhs) 65 | { assign(this->expr, rhs.expr); return rhs; }; 66 | 67 | // ============================================================================ 68 | // Generic operations 69 | // ============================================================================ 70 | 71 | inline FloatExpr mkFloatApply(FloatExpr a,Op op,FloatExpr b) 72 | { 73 | Expr* e = mkApply(a.expr, op, b.expr); 74 | return mkFloatExpr(e); 75 | } 76 | 77 | // ============================================================================ 78 | // Specific operations 79 | // ============================================================================ 80 | 81 | // Read an Float from the UNIFORM FIFO. 82 | FloatExpr getUniformFloat() 83 | { 84 | Expr* e = mkExpr(); 85 | e->tag = VAR; 86 | e->var.tag = UNIFORM; 87 | return mkFloatExpr(e); 88 | } 89 | 90 | // Add 91 | FloatExpr operator+(FloatExpr a, FloatExpr b) 92 | { return mkFloatApply(a, mkOp(ADD, FLOAT), b); } 93 | 94 | // Subtract 95 | FloatExpr operator-(FloatExpr a, FloatExpr b) 96 | { return mkFloatApply(a, mkOp(SUB, FLOAT), b); } 97 | 98 | // Multiply 99 | FloatExpr operator*(FloatExpr a, FloatExpr b) 100 | { return mkFloatApply(a, mkOp(MUL, FLOAT), b); } 101 | 102 | // Min 103 | FloatExpr min(FloatExpr a, FloatExpr b) 104 | { return mkFloatApply(a, mkOp(MIN, FLOAT), b); } 105 | 106 | // Max 107 | FloatExpr max(FloatExpr a, FloatExpr b) 108 | { return mkFloatApply(a, mkOp(MAX, FLOAT), b); } 109 | -------------------------------------------------------------------------------- /Lib/Source/Float.h: -------------------------------------------------------------------------------- 1 | // This module defines type 'Float' for a vector of 16 x 32-bit floats. 2 | 3 | #ifndef _SOURCE_FLOAT_H_ 4 | #define _SOURCE_FLOAT_H_ 5 | 6 | #include 7 | #include "Source/Syntax.h" 8 | 9 | // ============================================================================ 10 | // Types 11 | // ============================================================================ 12 | 13 | // An 'FloatExpr' defines an float vector expression which can 14 | // only be used on the RHS of assignment statements. 15 | 16 | struct FloatExpr { 17 | // Abstract syntax tree 18 | Expr* expr; 19 | // Constructors 20 | FloatExpr(); 21 | FloatExpr(float x); 22 | }; 23 | 24 | // An 'Float' defines a float vector variable which can be used in 25 | // both the LHS and RHS of an assignment. 26 | 27 | struct Float { 28 | // Abstract syntax tree 29 | Expr* expr; 30 | 31 | // Constructors 32 | Float(); 33 | Float(float x); 34 | Float(FloatExpr e); 35 | 36 | // Copy constructors 37 | Float(Float& x); 38 | Float(const Float& x); 39 | 40 | // Cast to an FloatExpr 41 | operator FloatExpr(); 42 | 43 | // Assignment 44 | Float& operator=(Float& rhs); 45 | FloatExpr operator=(FloatExpr rhs); 46 | }; 47 | 48 | // ============================================================================ 49 | // Operations 50 | // ============================================================================ 51 | 52 | FloatExpr getUniformFloat(); 53 | 54 | FloatExpr operator+(FloatExpr a, FloatExpr b); 55 | FloatExpr operator-(FloatExpr a, FloatExpr b); 56 | FloatExpr operator*(FloatExpr a, FloatExpr b); 57 | FloatExpr min(FloatExpr a, FloatExpr b); 58 | FloatExpr max(FloatExpr a, FloatExpr b); 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /Lib/Source/Gen.h: -------------------------------------------------------------------------------- 1 | // A random source-program generator 2 | 3 | #ifndef _GEN_H_ 4 | #define _GEN_H_ 5 | 6 | #include "Common/Seq.h" 7 | #include "Source/Syntax.h" 8 | 9 | // Options for the program generator 10 | struct GenOptions { 11 | // Limit on depth of nesting of statements and expressions 12 | int depth; 13 | 14 | // Limit on length of statement sequences 15 | int length; 16 | 17 | // Number of arguments to use when generating kernels 18 | int numIntArgs; 19 | int numFloatArgs; 20 | int numPtrArgs; 21 | int numPtr2Args; 22 | 23 | // Number of variables to use when generating kernels 24 | int numIntVars; 25 | int numFloatVars; 26 | 27 | // Loop bound (max iterations of any loop) 28 | int loopBound; 29 | 30 | // Generate float operations? 31 | bool genFloat; 32 | 33 | // Generate rotate operations? 34 | bool genRotate; 35 | 36 | // Generate pointer-dereferencing operations? 37 | bool genDeref; 38 | bool genDeref2; 39 | 40 | // We can dereference expressions of the form p+e where p is a 41 | // pointer variable and e is an integer expression that is bitwise 42 | // anded with derefOffsetMask. 43 | int derefOffsetMask; 44 | 45 | // Allow loads and stores to be strided? 46 | bool genStrided; 47 | }; 48 | 49 | // Generate random literals 50 | int genIntLit(); 51 | float genFloatLit(); 52 | 53 | // Generate a random source program 54 | Stmt* progGen(GenOptions* opts, int* numVars); 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /Lib/Source/Int.cpp: -------------------------------------------------------------------------------- 1 | #include "Source/Int.h" 2 | #include "Source/Stmt.h" 3 | #include "Source/Float.h" 4 | 5 | // ============================================================================ 6 | // Type 'IntExpr' 7 | // ============================================================================ 8 | 9 | // Constructors 10 | 11 | IntExpr::IntExpr() { this->expr = NULL; } 12 | 13 | IntExpr::IntExpr(int x) { this->expr = mkIntLit(x); } 14 | 15 | // Helper constructor 16 | 17 | inline IntExpr mkIntExpr(Expr* e) { IntExpr x; x.expr = e; return x; } 18 | inline FloatExpr mkFloatExpr(Expr* e) { FloatExpr x; x.expr = e; return x; } 19 | 20 | // ============================================================================ 21 | // Type 'Int' 22 | // ============================================================================ 23 | 24 | // Constructors 25 | 26 | Int::Int() { 27 | Var v = freshVar(); 28 | this->expr = mkVar(v); 29 | } 30 | 31 | Int::Int(int x) { 32 | Var v = freshVar(); 33 | this->expr = mkVar(v); 34 | assign(this->expr, mkIntLit(x)); 35 | } 36 | 37 | Int::Int(IntExpr e) { 38 | Var v = freshVar(); 39 | this->expr = mkVar(v); 40 | assign(this->expr, e.expr); 41 | } 42 | 43 | // Copy constructors 44 | 45 | Int::Int(Int& x) { 46 | Var v = freshVar(); 47 | this->expr = mkVar(v); 48 | assign(this->expr, x.expr); 49 | } 50 | 51 | Int::Int(const Int& x) { 52 | Var v = freshVar(); 53 | this->expr = mkVar(v); 54 | assign(this->expr, x.expr); 55 | } 56 | 57 | // Cast to an IntExpr 58 | 59 | Int::operator IntExpr() { return mkIntExpr(this->expr); } 60 | 61 | // Assignment 62 | 63 | Int& Int::operator=(Int& rhs) 64 | { assign(this->expr, rhs.expr); return rhs; } 65 | 66 | IntExpr Int::operator=(IntExpr rhs) 67 | { assign(this->expr, rhs.expr); return rhs; }; 68 | 69 | // ============================================================================ 70 | // Generic operations 71 | // ============================================================================ 72 | 73 | inline IntExpr mkIntApply(IntExpr a,Op op,IntExpr b) 74 | { 75 | Expr* e = mkApply(a.expr, op, b.expr); 76 | return mkIntExpr(e); 77 | } 78 | 79 | // ============================================================================ 80 | // Specific operations 81 | // ============================================================================ 82 | 83 | // Read an Int from the UNIFORM FIFO. 84 | IntExpr getUniformInt() 85 | { 86 | Expr* e = mkExpr(); 87 | e->tag = VAR; 88 | e->var.tag = UNIFORM; 89 | return mkIntExpr(e); 90 | } 91 | 92 | // A vector containing integers 0..15 93 | IntExpr index() 94 | { 95 | Expr* e = mkExpr(); 96 | e->tag = VAR; 97 | e->var.tag = ELEM_NUM; 98 | return mkIntExpr(e); 99 | } 100 | 101 | // A vector containing the QPU id 102 | IntExpr me() 103 | { 104 | // There is reserved var holding the QPU ID. 105 | Expr* e = mkExpr(); 106 | e->tag = VAR; 107 | e->var.tag = STANDARD; 108 | e->var.id = RSV_QPU_ID; 109 | return mkIntExpr(e); 110 | } 111 | 112 | // A vector containing the QPU count 113 | IntExpr numQPUs() 114 | { 115 | // There is reserved var holding the QPU count. 116 | Expr* e = mkExpr(); 117 | e->tag = VAR; 118 | e->var.tag = STANDARD; 119 | e->var.id = RSV_NUM_QPUS; 120 | return mkIntExpr(e); 121 | } 122 | 123 | // Vector rotation 124 | IntExpr rotate(IntExpr a, IntExpr b) 125 | { return mkIntApply(a, mkOp(ROTATE, INT32), b); } 126 | 127 | FloatExpr rotate(FloatExpr a, IntExpr b) 128 | { 129 | Expr* e = mkApply(a.expr, mkOp(ROTATE, FLOAT), b.expr); 130 | return mkFloatExpr(e); 131 | } 132 | 133 | // Add 134 | IntExpr operator+(IntExpr a, IntExpr b) 135 | { return mkIntApply(a, mkOp(ADD, INT32), b); } 136 | 137 | // Increment 138 | void Int::operator++(int) 139 | { *this = *this + 1; } 140 | 141 | // Subtract 142 | IntExpr operator-(IntExpr a, IntExpr b) 143 | { return mkIntApply(a, mkOp(SUB, INT32), b); } 144 | 145 | // Multiply 146 | IntExpr operator*(IntExpr a, IntExpr b) 147 | { return mkIntApply(a, mkOp(MUL, INT32), b); } 148 | 149 | // Min 150 | IntExpr min(IntExpr a, IntExpr b) 151 | { return mkIntApply(a, mkOp(MIN, INT32), b); } 152 | 153 | // Max 154 | IntExpr max(IntExpr a, IntExpr b) 155 | { return mkIntApply(a, mkOp(MAX, INT32), b); } 156 | 157 | // Shift left 158 | IntExpr operator<<(IntExpr a, IntExpr b) 159 | { return mkIntApply(a, mkOp(SHL, INT32), b); } 160 | 161 | // Shift Right 162 | IntExpr operator>>(IntExpr a, IntExpr b) 163 | { return mkIntApply(a, mkOp(SHR, INT32), b); } 164 | 165 | // Bitwise AND 166 | IntExpr operator&(IntExpr a, IntExpr b) 167 | { return mkIntApply(a, mkOp(BAND, INT32), b); } 168 | 169 | // Bitwise OR 170 | IntExpr operator|(IntExpr a, IntExpr b) 171 | { return mkIntApply(a, mkOp(BOR, INT32), b); } 172 | 173 | // Bitwise XOR 174 | IntExpr operator^(IntExpr a, IntExpr b) 175 | { return mkIntApply(a, mkOp(BXOR, INT32), b); } 176 | 177 | // Bitwise NOT 178 | IntExpr operator~(IntExpr a) 179 | { return mkIntApply(a, mkOp(BNOT, INT32), a); } 180 | 181 | // Unsigned shift-right 182 | IntExpr shr(IntExpr a, IntExpr b) 183 | { return mkIntApply(a, mkOp(USHR, INT32), b); } 184 | 185 | // Bitwise rotate-right 186 | IntExpr ror(IntExpr a, IntExpr b) 187 | { return mkIntApply(a, mkOp(ROR, INT32), b); } 188 | 189 | // Conversion to Int 190 | IntExpr toInt(FloatExpr a) 191 | { 192 | Expr* e = mkApply(a.expr, mkOp(FtoI, INT32), mkIntLit(0)); 193 | return mkIntExpr(e); 194 | } 195 | 196 | // Conversion to Float 197 | FloatExpr toFloat(IntExpr a) 198 | { 199 | Expr* e = mkApply(a.expr, mkOp(ItoF, FLOAT), mkIntLit(0)); 200 | return mkFloatExpr(e); 201 | } 202 | -------------------------------------------------------------------------------- /Lib/Source/Int.h: -------------------------------------------------------------------------------- 1 | // This module defines type 'Int' for a vector of 16 x 32-bit integers. 2 | 3 | #ifndef _SOURCE_INT_H_ 4 | #define _SOURCE_INT_H_ 5 | 6 | #include 7 | #include "Source/Syntax.h" 8 | #include "Source/Float.h" 9 | 10 | // ============================================================================ 11 | // Types 12 | // ============================================================================ 13 | 14 | // An 'IntExpr' defines an integer vector expression which can 15 | // only be used on the RHS of assignment statements. 16 | 17 | struct IntExpr { 18 | // Abstract syntax tree 19 | Expr* expr; 20 | // Constructors 21 | IntExpr(); 22 | IntExpr(int x); 23 | }; 24 | 25 | // An 'Int' defines an integer vector variable which can be used in 26 | // both the LHS and RHS of an assignment. 27 | 28 | struct Int { 29 | // Abstract syntax tree 30 | Expr* expr; 31 | 32 | // Constructors 33 | Int(); 34 | Int(int x); 35 | Int(IntExpr e); 36 | 37 | // Copy constructors 38 | Int(Int& x); 39 | Int(const Int& x); 40 | 41 | // Cast to an IntExpr 42 | operator IntExpr(); 43 | 44 | // Assignment 45 | Int& operator=(Int& rhs); 46 | IntExpr operator=(IntExpr rhs); 47 | 48 | // Increment 49 | void operator++(int); 50 | }; 51 | 52 | // ============================================================================ 53 | // Operations 54 | // ============================================================================ 55 | 56 | IntExpr getUniformInt(); 57 | IntExpr index(); 58 | IntExpr me(); 59 | IntExpr numQPUs(); 60 | 61 | IntExpr rotate(IntExpr a, IntExpr b); 62 | FloatExpr rotate(FloatExpr a, IntExpr b); 63 | 64 | IntExpr operator+(IntExpr a, IntExpr b); 65 | IntExpr operator-(IntExpr a, IntExpr b); 66 | IntExpr operator*(IntExpr a, IntExpr b); 67 | IntExpr min(IntExpr a, IntExpr b); 68 | IntExpr max(IntExpr a, IntExpr b); 69 | IntExpr operator<<(IntExpr a, IntExpr b); 70 | IntExpr operator>>(IntExpr a, IntExpr b); 71 | IntExpr operator&(IntExpr a, IntExpr b); 72 | IntExpr operator|(IntExpr a, IntExpr b); 73 | IntExpr operator^(IntExpr a, IntExpr b); 74 | IntExpr operator~(IntExpr a); 75 | IntExpr shr(IntExpr a, IntExpr b); 76 | IntExpr ror(IntExpr a, IntExpr b); 77 | IntExpr toInt(FloatExpr a); 78 | FloatExpr toFloat(IntExpr a); 79 | 80 | #endif 81 | -------------------------------------------------------------------------------- /Lib/Source/Interpreter.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPRETER_H_ 2 | #define _INTERPRETER_H_ 3 | 4 | #include 5 | #include "Common/Seq.h" 6 | #include "Source/Syntax.h" 7 | 8 | // The interpreter works in a similar way to the emulator. The 9 | // difference is that the former operates on source code and the 10 | // latter on target code. We reuse a number of concepts of the 11 | // emulator in the interpreter. 12 | 13 | #include "Target/Emulator.h" 14 | 15 | // State of a single core. 16 | struct CoreState { 17 | int id; // Core id 18 | int numCores; // Core count 19 | Seq* uniforms; // Arguments to kernel 20 | int nextUniform; // Pointer to next uniform to read 21 | int readStride; // Read stride 22 | int writeStride; // Write stride 23 | Vec* env; // Environment mapping vars to values 24 | int sizeEnv; // Size of the environment 25 | Seq* output; // Output from print statements 26 | Seq* stack; // Control stack 27 | Seq* loadBuffer; // Load buffer 28 | }; 29 | 30 | // State of the Interpreter. 31 | struct InterpreterState { 32 | CoreState core[MAX_QPUS]; // State of each core 33 | Word vpm[VPM_SIZE]; // Shared VPM memory 34 | int sema[16]; // Semaphores 35 | }; 36 | 37 | // Interpreter 38 | void interpreter 39 | ( int numCores // Number of cores active 40 | , Stmt* s // Source code 41 | , int numVars // Max var id used in source 42 | , Seq* uniforms // Kernel parameters 43 | , Seq* output // Output from print statements 44 | // (if NULL, stdout is used) 45 | ); 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /Lib/Source/Pretty.cpp: -------------------------------------------------------------------------------- 1 | #include "Source/Pretty.h" 2 | 3 | // ============================================================================ 4 | // Operators 5 | // ============================================================================ 6 | 7 | const char* opToString(Op op) 8 | { 9 | switch (op.op) { 10 | case ADD: return "+"; 11 | case SUB: return "-"; 12 | case MUL: return "*"; 13 | case MIN: return " min "; 14 | case MAX: return " max "; 15 | case ROTATE: return " rotate "; 16 | case SHL: return " << "; 17 | case SHR: return " >> "; 18 | case USHR: return " _>> "; 19 | case ROR: return " ror "; 20 | case BOR: return " | "; 21 | case BAND: return " & "; 22 | case BXOR: return " ^ "; 23 | case BNOT: return "~"; 24 | case ItoF: return "(Float) "; 25 | case FtoI: return "(Int) "; 26 | } 27 | 28 | // Not reachable 29 | assert(false); 30 | } 31 | 32 | const char* cmpOpToString(CmpOp op) 33 | { 34 | switch (op.op) { 35 | case EQ : return "=="; 36 | case NEQ: return "!="; 37 | case LT : return "<"; 38 | case LE : return "<="; 39 | case GT : return ">"; 40 | case GE : return ">="; 41 | } 42 | 43 | // Not reachable 44 | assert(false); 45 | } 46 | 47 | // ============================================================================ 48 | // Expressions 49 | // ============================================================================ 50 | 51 | void pretty(Expr* e) 52 | { 53 | if (e == NULL) return; 54 | 55 | switch (e->tag) { 56 | // Integer literals 57 | case INT_LIT: 58 | printf("%i", e->intLit); 59 | break; 60 | 61 | // Float literals 62 | case FLOAT_LIT: 63 | printf("%f", e->floatLit); 64 | break; 65 | 66 | // Variables 67 | case VAR: 68 | if (e->var.tag == STANDARD) 69 | printf("v%i", e->var.id); 70 | else if (e->var.tag == UNIFORM) 71 | printf("UNIFORM"); 72 | else if (e->var.tag == QPU_NUM) 73 | printf("QPU_NUM"); 74 | else if (e->var.tag == ELEM_NUM) 75 | printf("ELEM_NUM"); 76 | else if (e->var.tag == TMU0_ADDR) 77 | printf("TMU0_ADDR"); 78 | break; 79 | 80 | // Applications 81 | case APPLY: 82 | if (isUnary(e->apply.op)) { 83 | printf("("); 84 | printf("%s", opToString(e->apply.op)); 85 | pretty(e->apply.lhs); 86 | printf(")"); 87 | } 88 | else { 89 | printf("("); 90 | pretty(e->apply.lhs); 91 | printf("%s", opToString(e->apply.op)); 92 | pretty(e->apply.rhs); 93 | printf(")"); 94 | } 95 | break; 96 | 97 | // Dereference 98 | case DEREF: 99 | printf("*"); 100 | pretty(e->deref.ptr); 101 | break; 102 | 103 | } 104 | } 105 | 106 | // ============================================================================ 107 | // Boolean expressions 108 | // ============================================================================ 109 | 110 | void pretty(BExpr* b) 111 | { 112 | if (b == NULL) return; 113 | 114 | switch (b->tag) { 115 | // Negation 116 | case NOT: 117 | printf("!"); 118 | pretty(b->neg); 119 | break; 120 | 121 | // Conjunction 122 | case AND: 123 | printf("("); 124 | pretty(b->conj.lhs); 125 | printf(" && "); 126 | pretty(b->conj.rhs); 127 | printf(")"); 128 | break; 129 | 130 | // Disjunction 131 | case OR: 132 | printf("("); 133 | pretty(b->disj.lhs); 134 | printf(" || "); 135 | pretty(b->disj.rhs); 136 | printf(")"); 137 | break; 138 | 139 | // Comparison 140 | case CMP: 141 | pretty(b->cmp.lhs); 142 | printf("%s", cmpOpToString(b->cmp.op)); 143 | pretty(b->cmp.rhs); 144 | break; 145 | } 146 | } 147 | 148 | // ============================================================================ 149 | // Conditional expressions 150 | // ============================================================================ 151 | 152 | void pretty(CExpr* c) 153 | { 154 | if (c == NULL) return; 155 | 156 | switch (c->tag) { 157 | // Reduce using 'any' 158 | case ANY: printf("any("); break; 159 | 160 | // Reduce using 'all' 161 | case ALL: printf("all("); break; 162 | } 163 | 164 | pretty(c->bexpr); 165 | printf(")"); 166 | } 167 | 168 | // ============================================================================ 169 | // Statements 170 | // ============================================================================ 171 | 172 | void indentBy(int indent) { 173 | for (int i = 0; i < indent; i++) printf(" "); 174 | } 175 | 176 | void pretty(int indent, Stmt* s) 177 | { 178 | if (s == NULL) return; 179 | 180 | switch (s->tag) { 181 | // Skip 182 | case SKIP: break; 183 | 184 | // Assignment 185 | case ASSIGN: 186 | indentBy(indent); 187 | pretty(s->assign.lhs); 188 | printf(" = "); 189 | pretty(s->assign.rhs); 190 | printf(";\n"); 191 | break; 192 | 193 | // Sequential composition 194 | case SEQ: 195 | pretty(indent, s->seq.s0); 196 | pretty(indent, s->seq.s1); 197 | break; 198 | 199 | // Where statement 200 | case WHERE: 201 | indentBy(indent); 202 | printf("Where ("); 203 | pretty(s->where.cond); 204 | printf(")\n"); 205 | pretty(indent+2, s->where.thenStmt); 206 | if (s->where.elseStmt != NULL) { 207 | indentBy(indent); 208 | printf("Else\n"); 209 | pretty(indent+2, s->where.elseStmt); 210 | } 211 | indentBy(indent); 212 | printf("End\n"); 213 | break; 214 | 215 | // If statement 216 | case IF: 217 | indentBy(indent); 218 | printf("If ("); 219 | pretty(s->ifElse.cond); 220 | printf(")\n"); 221 | pretty(indent+2, s->ifElse.thenStmt); 222 | if (s->where.elseStmt != NULL) { 223 | indentBy(indent); 224 | printf("Else\n"); 225 | pretty(indent+2, s->ifElse.elseStmt); 226 | } 227 | indentBy(indent); 228 | printf("End\n"); 229 | break; 230 | 231 | // While statement 232 | case WHILE: 233 | indentBy(indent); 234 | printf("While ("); 235 | pretty(s->loop.cond); 236 | printf(")\n"); 237 | pretty(indent+2, s->loop.body); 238 | indentBy(indent); 239 | printf("End\n"); 240 | break; 241 | 242 | // Print statement 243 | case PRINT: 244 | indentBy(indent); 245 | printf("Print ("); 246 | if (s->print.tag == PRINT_STR) { 247 | // Ideally would print escaped string here 248 | printf("\"%s\"", s->print.str); 249 | } 250 | else 251 | pretty(s->print.expr); 252 | printf(")\n"); 253 | break; 254 | 255 | // Set read stride 256 | case SET_READ_STRIDE: 257 | indentBy(indent); 258 | printf("setReadStride("); 259 | pretty(s->stride); 260 | printf(")\n"); 261 | break; 262 | 263 | // Set write stride 264 | case SET_WRITE_STRIDE: 265 | indentBy(indent); 266 | printf("setWriteStride("); 267 | pretty(s->stride); 268 | printf(")\n"); 269 | break; 270 | 271 | // Load receive 272 | case LOAD_RECEIVE: 273 | indentBy(indent); 274 | printf("receive("); 275 | pretty(s->loadDest); 276 | printf(")\n"); 277 | break; 278 | 279 | // Store request 280 | case STORE_REQUEST: 281 | indentBy(indent); 282 | printf("store("); 283 | pretty(s->storeReq.data); 284 | printf(", "); 285 | pretty(s->storeReq.addr); 286 | printf(")\n"); 287 | break; 288 | 289 | // Flush outstanding stores 290 | case FLUSH: 291 | indentBy(indent); 292 | printf("flush()\n"); 293 | break; 294 | 295 | // Increment semaphore 296 | case SEMA_INC: 297 | indentBy(indent); 298 | printf("semaInc(%i)\n", s->semaId); 299 | break; 300 | 301 | // Decrement semaphore 302 | case SEMA_DEC: 303 | indentBy(indent); 304 | printf("semaDec(%i)\n", s->semaId); 305 | break; 306 | 307 | // Host IRQ 308 | case SEND_IRQ_TO_HOST: 309 | indentBy(indent); 310 | printf("hostIRQ()\n"); 311 | break; 312 | 313 | // Not reachable 314 | default: 315 | assert(false); 316 | } 317 | } 318 | 319 | void pretty(Stmt* s) 320 | { 321 | pretty(0, s); 322 | } 323 | -------------------------------------------------------------------------------- /Lib/Source/Pretty.h: -------------------------------------------------------------------------------- 1 | #ifndef _SOURCE_PRETTY_H_ 2 | #define _SOURCE_PRETTY_H_ 3 | 4 | #include "Source/Syntax.h" 5 | 6 | // Pretty printer for the QPULib source language 7 | void pretty(Expr* e); 8 | void pretty(BExpr* b); 9 | void pretty(CExpr* c); 10 | void pretty(Stmt* s); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /Lib/Source/Ptr.h: -------------------------------------------------------------------------------- 1 | // This module defines type 'Ptr' type denoting a pointer to a 2 | // value of type 'T'. 3 | 4 | #ifndef _SOURCE_PTR_H_ 5 | #define _SOURCE_PTR_H_ 6 | 7 | #include 8 | #include "Source/Syntax.h" 9 | 10 | // ============================================================================ 11 | // Types 12 | // ============================================================================ 13 | 14 | // A 'PtrExpr' defines a pointer expression which can only be used on the 15 | // RHS of assignment statements. 16 | template struct PtrExpr { 17 | // Abstract syntax tree 18 | Expr* expr; 19 | 20 | PtrExpr() { this->expr = NULL; } 21 | 22 | // Dereference 23 | T& operator*() { 24 | // This operation must return a reference to T, so we allocate the 25 | // AST node on the heap an return a reference to it. 26 | T* p = astHeap.alloc(1); 27 | p->expr = mkDeref(expr); 28 | return *p; 29 | } 30 | 31 | // Array index 32 | T& operator[](IntExpr index) { 33 | T* p = astHeap.alloc(1); 34 | p->expr = mkDeref(mkApply(expr, mkOp(ADD, INT32), 35 | mkApply(index.expr, mkOp(SHL, INT32), mkIntLit(2)))); 36 | return *p; 37 | } 38 | }; 39 | 40 | // A 'Ptr' defines a pointer variable which can be used in both the LHS and 41 | // RHS of an assignment. 42 | 43 | template struct Ptr { 44 | // Abstract syntax tree 45 | Expr* expr; 46 | 47 | // Constructors 48 | Ptr() { 49 | Var v = freshVar(); 50 | this->expr = mkVar(v); 51 | } 52 | 53 | Ptr(PtrExpr rhs) { 54 | Var v = freshVar(); 55 | this->expr = mkVar(v); 56 | assign(this->expr, rhs.expr); 57 | } 58 | 59 | // Copy constructors 60 | Ptr(Ptr& x) { 61 | Var v = freshVar(); 62 | this->expr = mkVar(v); 63 | assign(this->expr, x.expr); 64 | } 65 | Ptr(const Ptr& x) { 66 | Var v = freshVar(); 67 | this->expr = mkVar(v); 68 | assign(this->expr, x.expr); 69 | } 70 | 71 | // Assignment 72 | Ptr& operator=(Ptr& rhs) { 73 | assign(this->expr, rhs.expr); 74 | return rhs; 75 | } 76 | 77 | PtrExpr operator=(PtrExpr rhs) { 78 | assign(this->expr, rhs.expr); 79 | return rhs; 80 | } 81 | 82 | // Dereference 83 | T& operator*() { 84 | // This operation must return a reference to T, so we allocate the 85 | // AST node on the heap an return a reference to it. 86 | T* p = astHeap.alloc(1); 87 | p->expr = mkDeref(expr); 88 | return *p; 89 | } 90 | 91 | // Array index 92 | T& operator[](IntExpr index) { 93 | T* p = astHeap.alloc(1); 94 | p->expr = mkDeref(mkApply(expr, mkOp(ADD, INT32), 95 | mkApply(index.expr, mkOp(SHL, INT32), mkIntLit(2)))); 96 | return *p; 97 | } 98 | }; 99 | 100 | // ============================================================================ 101 | // Specific operations 102 | // ============================================================================ 103 | 104 | template inline PtrExpr getUniformPtr() { 105 | Expr* e = mkExpr(); 106 | e->tag = VAR; 107 | e->var.tag = UNIFORM; 108 | PtrExpr x; x.expr = e; return x; 109 | } 110 | 111 | template inline PtrExpr operator+(PtrExpr a, int b) { 112 | Expr* e = mkApply(a.expr, mkOp(ADD, INT32), mkIntLit(4*b)); 113 | PtrExpr x; x.expr = e; return x; 114 | } 115 | 116 | template inline PtrExpr operator+(Ptr &a, int b) { 117 | Expr* e = mkApply(a.expr, mkOp(ADD, INT32), mkIntLit(4*b)); 118 | PtrExpr x; x.expr = e; return x; 119 | } 120 | 121 | template inline PtrExpr operator+(PtrExpr a, IntExpr b) { 122 | Expr* e = mkApply(a.expr, mkOp(ADD, INT32), (b<<2).expr); 123 | PtrExpr x; x.expr = e; return x; 124 | } 125 | 126 | template inline PtrExpr operator+(Ptr &a, IntExpr b) { 127 | Expr* e = mkApply(a.expr, mkOp(ADD, INT32), (b<<2).expr); 128 | PtrExpr x; x.expr = e; return x; 129 | } 130 | 131 | 132 | #endif 133 | -------------------------------------------------------------------------------- /Lib/Source/Stmt.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Common/Stack.h" 3 | #include "Source/Stmt.h" 4 | #include "Source/Int.h" 5 | 6 | // Interface to the embedded language. 7 | 8 | //============================================================================= 9 | // Assignment token 10 | //============================================================================= 11 | 12 | void assign(Expr* lhs, Expr* rhs) { 13 | Stmt* s = mkAssign(lhs, rhs); 14 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 15 | } 16 | 17 | //============================================================================= 18 | // 'If' token 19 | //============================================================================= 20 | 21 | void If_(Cond c) 22 | { 23 | Stmt* s = mkIf(c.cexpr, NULL, NULL); 24 | controlStack.push(s); 25 | stmtStack.push(mkSkip()); 26 | } 27 | 28 | void If_(BoolExpr b) 29 | { 30 | If_(any(b)); 31 | } 32 | 33 | //============================================================================= 34 | // 'Else' token 35 | //============================================================================= 36 | 37 | void Else_() 38 | { 39 | int ok = 0; 40 | if (controlStack.size > 0) { 41 | Stmt* s = controlStack.top(); 42 | if (s->tag == IF && s->ifElse.thenStmt == NULL) { 43 | s->ifElse.thenStmt = stmtStack.top(); 44 | stmtStack.replace(mkSkip()); 45 | ok = 1; 46 | } 47 | if (s->tag == WHERE && s->where.thenStmt == NULL) { 48 | s->where.thenStmt = stmtStack.top(); 49 | stmtStack.replace(mkSkip()); 50 | ok = 1; 51 | } 52 | } 53 | 54 | if (!ok) { 55 | printf("Syntax error: 'Else' without preceeding 'If' or 'Where'\n"); 56 | exit(-1); 57 | } 58 | } 59 | 60 | //============================================================================= 61 | // 'End' token 62 | //============================================================================= 63 | 64 | void End_() 65 | { 66 | int ok = 0; 67 | if (controlStack.size > 0) { 68 | Stmt* s = controlStack.top(); 69 | if (s->tag == IF && s->ifElse.thenStmt == NULL) { 70 | s->ifElse.thenStmt = stmtStack.top(); 71 | ok = 1; 72 | } 73 | else if (s->tag == IF && s->ifElse.elseStmt == NULL) { 74 | s->ifElse.elseStmt = stmtStack.top(); 75 | ok = 1; 76 | } 77 | if (s->tag == WHERE && s->where.thenStmt == NULL) { 78 | s->where.thenStmt = stmtStack.top(); 79 | ok = 1; 80 | } 81 | else if (s->tag == WHERE && s->where.elseStmt == NULL) { 82 | s->where.elseStmt = stmtStack.top(); 83 | ok = 1; 84 | } 85 | if (s->tag == WHILE && s->loop.body == NULL) { 86 | s->loop.body = stmtStack.top(); 87 | ok = 1; 88 | } 89 | if (s->tag == FOR && s->forLoop.body == NULL) { 90 | // Convert 'for' loop to 'while' loop 91 | CExpr* whileCond = s->forLoop.cond; 92 | Stmt* whileBody = mkSeq(stmtStack.top(), s->forLoop.inc); 93 | s->tag = WHILE; 94 | s->loop.body = whileBody; 95 | s->loop.cond = whileCond; 96 | ok = 1; 97 | } 98 | 99 | if (ok) { 100 | stmtStack.pop(); 101 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 102 | controlStack.pop(); 103 | } 104 | } 105 | 106 | if (!ok) { 107 | printf("Syntax error: unexpected 'End'\n"); 108 | exit(-1); 109 | } 110 | } 111 | 112 | //============================================================================= 113 | // 'While' token 114 | //============================================================================= 115 | 116 | void While_(Cond c) 117 | { 118 | Stmt* s = mkWhile(c.cexpr, NULL); 119 | controlStack.push(s); 120 | stmtStack.push(mkSkip()); 121 | } 122 | 123 | void While_(BoolExpr b) 124 | { 125 | While_(any(b)); 126 | } 127 | 128 | //============================================================================= 129 | // 'Where' token 130 | //============================================================================= 131 | 132 | void Where__(BExpr* b) 133 | { 134 | Stmt* s = mkWhere(b, NULL, NULL); 135 | controlStack.push(s); 136 | stmtStack.push(mkSkip()); 137 | } 138 | 139 | //============================================================================= 140 | // 'For' token 141 | //============================================================================= 142 | 143 | void For_(Cond c) 144 | { 145 | Stmt* s = mkFor(c.cexpr, NULL, NULL); 146 | controlStack.push(s); 147 | stmtStack.push(mkSkip()); 148 | } 149 | 150 | void For_(BoolExpr b) 151 | { 152 | For_(any(b)); 153 | } 154 | 155 | void ForBody_() 156 | { 157 | Stmt* s = controlStack.top(); 158 | s->forLoop.inc = stmtStack.top(); 159 | stmtStack.pop(); 160 | stmtStack.push(mkSkip()); 161 | } 162 | 163 | //============================================================================= 164 | // 'Print' token 165 | //============================================================================= 166 | 167 | void Print(const char* str) 168 | { 169 | Stmt* s = mkStmt(); 170 | s->tag = PRINT; 171 | s->print.tag = PRINT_STR; 172 | s->print.str = str; 173 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 174 | } 175 | 176 | void Print(IntExpr x) 177 | { 178 | Stmt* s = mkStmt(); 179 | s->tag = PRINT; 180 | s->print.tag = PRINT_INT; 181 | s->print.expr = x.expr; 182 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 183 | } 184 | 185 | //============================================================================= 186 | // Set stride 187 | //============================================================================= 188 | 189 | void setReadStride(IntExpr stride) 190 | { 191 | Stmt* s = mkStmt(); 192 | s->tag = SET_READ_STRIDE; 193 | s->stride = stride.expr; 194 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 195 | } 196 | 197 | void setWriteStride(IntExpr stride) 198 | { 199 | Stmt* s = mkStmt(); 200 | s->tag = SET_WRITE_STRIDE; 201 | s->stride = stride.expr; 202 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 203 | } 204 | 205 | // ============================================================================ 206 | // QPU code for clean exit 207 | // ============================================================================ 208 | 209 | void kernelFinish() 210 | { 211 | // Ensure outstanding stores have completed 212 | flush(); 213 | 214 | // QPU 0 waits until all other QPUs have finished 215 | // before sending a host IRQ. 216 | If (me() == 0) 217 | Int n = numQPUs()-1; 218 | For (Int i = 0, i < n, i++) 219 | semaDec(15); 220 | End 221 | hostIRQ(); 222 | Else 223 | semaInc(15); 224 | End 225 | } 226 | -------------------------------------------------------------------------------- /Lib/Source/Stmt.h: -------------------------------------------------------------------------------- 1 | #ifndef _SOURCE_STMT_H_ 2 | #define _SOURCE_STMT_H_ 3 | 4 | #include "Source/Cond.h" 5 | #include "Source/Syntax.h" 6 | #include "Source/Ptr.h" 7 | #include "Source/StmtExtra.h" 8 | 9 | //============================================================================= 10 | // Statement macros 11 | //============================================================================= 12 | 13 | #define If(c) If_(c); { 14 | #define Else } Else_(); { 15 | #define End } End_(); 16 | #define While(c) While_(c); { 17 | #define Where(b) Where_(b); { 18 | #define For(init, cond, inc) \ 19 | { init; \ 20 | For_(cond); \ 21 | inc; \ 22 | ForBody_(); 23 | 24 | //============================================================================= 25 | // Statement tokens 26 | //============================================================================= 27 | 28 | void assign(Expr* lhs, Expr* rhs); 29 | void If_(Cond c); 30 | void If_(BoolExpr c); 31 | void Else_(); 32 | void End_(); 33 | void While_(Cond c); 34 | void While_(BoolExpr b); 35 | void Where__(BExpr* b); 36 | inline void Where_(BoolExpr b) { Where__(b.bexpr); } 37 | void For_(Cond c); 38 | void For_(BoolExpr b); 39 | void ForBody_(); 40 | void Print(const char *); 41 | void Print(IntExpr x); 42 | void setReadStride(IntExpr n); 43 | void setWriteStride(IntExpr n); 44 | void kernelFinish(); 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /Lib/Source/StmtExtra.h: -------------------------------------------------------------------------------- 1 | #ifndef _SOURCE_STMTEXTRA_H_ 2 | #define _SOURCE_STMTEXTRA_H_ 3 | 4 | //============================================================================= 5 | // Host IRQ 6 | //============================================================================= 7 | 8 | inline void hostIRQ() 9 | { 10 | Stmt* s = mkStmt(); 11 | s->tag = SEND_IRQ_TO_HOST; 12 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 13 | } 14 | 15 | //============================================================================= 16 | // Semaphore access 17 | //============================================================================= 18 | 19 | inline void semaInc(int semaId) 20 | { 21 | Stmt* s = mkStmt(); 22 | s->tag = SEMA_INC; 23 | s->semaId = semaId; 24 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 25 | } 26 | 27 | inline void semaDec(int semaId) 28 | { 29 | Stmt* s = mkStmt(); 30 | s->tag = SEMA_DEC; 31 | s->semaId = semaId; 32 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 33 | } 34 | 35 | //============================================================================= 36 | // Receive, request, store operations 37 | //============================================================================= 38 | 39 | inline void gatherExpr(Expr* e) 40 | { 41 | Var v; v.tag = TMU0_ADDR; 42 | Stmt* s = mkAssign(mkVar(v), e); 43 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 44 | } 45 | 46 | template inline void gather(PtrExpr addr) 47 | { gatherExpr(addr.expr); } 48 | 49 | template inline void gather(Ptr& addr) 50 | { gatherExpr(addr.expr); } 51 | 52 | inline void receiveExpr(Expr* e) 53 | { 54 | Stmt* s = mkStmt(); 55 | s->tag = LOAD_RECEIVE; 56 | s->loadDest = e; 57 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 58 | } 59 | 60 | inline void receive(Int& dest) 61 | { receiveExpr(dest.expr); } 62 | 63 | inline void receive(Float& dest) 64 | { receiveExpr(dest.expr); } 65 | 66 | template inline void receive(Ptr& dest) 67 | { receiveExpr(dest.expr); } 68 | 69 | inline void storeExpr(Expr* e0, Expr* e1) 70 | { 71 | Stmt* s = mkStmt(); 72 | s->tag = STORE_REQUEST; 73 | s->storeReq.data = e0; 74 | s->storeReq.addr = e1; 75 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 76 | } 77 | 78 | inline void store(IntExpr data, PtrExpr addr) 79 | { storeExpr(data.expr, addr.expr); } 80 | 81 | inline void store(FloatExpr data, PtrExpr addr) 82 | { storeExpr(data.expr, addr.expr); } 83 | 84 | inline void store(IntExpr data, Ptr &addr) 85 | { storeExpr(data.expr, addr.expr); } 86 | 87 | inline void store(FloatExpr data, Ptr &addr) 88 | { storeExpr(data.expr, addr.expr); } 89 | 90 | inline void flush() 91 | { 92 | Stmt* s = mkStmt(); 93 | s->tag = FLUSH; 94 | stmtStack.replace(mkSeq(stmtStack.top(), s)); 95 | } 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /Lib/Source/Syntax.cpp: -------------------------------------------------------------------------------- 1 | #include "Source/Syntax.h" 2 | #include "Common/Heap.h" 3 | #include "Common/Stack.h" 4 | #include "Params.h" 5 | 6 | // ============================================================================ 7 | // Globals 8 | // ============================================================================ 9 | 10 | // Used for fresh variable generation 11 | static int globalVarId = 0; 12 | 13 | // Used for constructing abstract syntax trees 14 | Heap astHeap("abstract syntax tree", AST_HEAP_SIZE); 15 | Stack stmtStack; 16 | Stack controlStack; 17 | 18 | // ============================================================================ 19 | // Functions on global variables 20 | // ============================================================================ 21 | 22 | // Obtain a fresh variable 23 | Var freshVar() 24 | { 25 | // Return a fresh standard variable 26 | Var v; 27 | v.tag = STANDARD; 28 | v.id = globalVarId++; 29 | return v; 30 | } 31 | 32 | // Number of fresh vars 33 | int getFreshVarCount() 34 | { 35 | return globalVarId; 36 | } 37 | 38 | // Reset fresh variable generator 39 | void resetFreshVarGen() 40 | { 41 | globalVarId = 0; 42 | } 43 | 44 | // Reset fresh variable generator to specified value 45 | void resetFreshVarGen(int val) 46 | { 47 | globalVarId = val; 48 | } 49 | 50 | // ============================================================================ 51 | // Functions on operators 52 | // ============================================================================ 53 | 54 | Op mkOp(OpId op, BaseType type) { 55 | Op o; 56 | o.op = op; 57 | o.type = type; 58 | return o; 59 | } 60 | 61 | CmpOp mkCmpOp(CmpOpId op, BaseType type) { 62 | CmpOp o; 63 | o.op = op; 64 | o.type = type; 65 | return o; 66 | } 67 | 68 | // Is operator unary? 69 | bool isUnary(Op op) 70 | { 71 | return (op.op == BNOT || op.op == ItoF || op.op == FtoI); 72 | } 73 | 74 | // Is given operator commutative? 75 | bool isCommutative(Op op) 76 | { 77 | if (op.type != FLOAT) { 78 | return op.op == ADD 79 | || op.op == MUL 80 | || op.op == BOR 81 | || op.op == BAND 82 | || op.op == BXOR 83 | || op.op == MIN 84 | || op.op == MAX; 85 | } 86 | return false; 87 | } 88 | 89 | // ============================================================================ 90 | // Functions on expressions 91 | // ============================================================================ 92 | 93 | // Function to allocate an expression 94 | Expr* mkExpr() 95 | { 96 | return astHeap.alloc(); 97 | } 98 | 99 | // Make an integer literal 100 | Expr* mkIntLit(int lit) 101 | { 102 | Expr* e = mkExpr(); 103 | e->tag = INT_LIT; 104 | e->intLit = lit; 105 | return e; 106 | } 107 | 108 | // Make a float literal 109 | Expr* mkFloatLit(float lit) 110 | { 111 | Expr* e = mkExpr(); 112 | e->tag = FLOAT_LIT; 113 | e->floatLit = lit; 114 | return e; 115 | } 116 | 117 | // Make a variable 118 | Expr* mkVar(Var var) 119 | { 120 | Expr* e = mkExpr(); 121 | e->tag = VAR; 122 | e->var = var; 123 | return e; 124 | } 125 | 126 | // Make an operator application 127 | Expr* mkApply(Expr* lhs, Op op, Expr* rhs) 128 | { 129 | Expr* e = mkExpr(); 130 | e->tag = APPLY; 131 | e->apply.lhs = lhs; 132 | e->apply.op = op; 133 | e->apply.rhs = rhs; 134 | return e; 135 | } 136 | 137 | // Make a pointer dereference 138 | Expr* mkDeref(Expr* ptr) 139 | { 140 | Expr* e = mkExpr(); 141 | e->tag = DEREF; 142 | e->deref.ptr = ptr; 143 | return e; 144 | } 145 | 146 | // Is an expression a literal? 147 | bool isLit(Expr* e) 148 | { 149 | return (e->tag == INT_LIT) || (e->tag == FLOAT_LIT); 150 | } 151 | 152 | // ============================================================================ 153 | // Functions on boolean expressions 154 | // ============================================================================ 155 | 156 | // Allocate a boolean expression 157 | BExpr* mkBExpr() 158 | { 159 | return astHeap.alloc(); 160 | } 161 | 162 | BExpr* mkNot(BExpr* neg) 163 | { 164 | BExpr *b = mkBExpr(); 165 | b->tag = NOT; 166 | b->neg = neg; 167 | return b; 168 | } 169 | 170 | BExpr* mkAnd(BExpr* lhs, BExpr* rhs) 171 | { 172 | BExpr *b = mkBExpr(); 173 | b->tag = AND; 174 | b->conj.lhs = lhs; 175 | b->conj.rhs = rhs; 176 | return b; 177 | } 178 | 179 | BExpr* mkOr(BExpr* lhs, BExpr* rhs) 180 | { 181 | BExpr *b = mkBExpr(); 182 | b->tag = OR; 183 | b->disj.lhs = lhs; 184 | b->disj.rhs = rhs; 185 | return b; 186 | } 187 | 188 | BExpr* mkCmp(Expr* lhs, CmpOp op, Expr* rhs) 189 | { 190 | BExpr *b = mkBExpr(); 191 | b->tag = CMP; 192 | b->cmp.lhs = lhs; 193 | b->cmp.op = op; 194 | b->cmp.rhs = rhs; 195 | return b; 196 | } 197 | 198 | // ============================================================================ 199 | // Functions on conditionals 200 | // ============================================================================ 201 | 202 | CExpr* mkCExpr() 203 | { 204 | return astHeap.alloc(); 205 | } 206 | 207 | CExpr* mkAll(BExpr* bexpr) 208 | { 209 | CExpr* c = mkCExpr(); 210 | c->tag = ALL; 211 | c->bexpr = bexpr; 212 | return c; 213 | } 214 | 215 | CExpr* mkAny(BExpr* bexpr) 216 | { 217 | CExpr* c = mkCExpr(); 218 | c->tag = ANY; 219 | c->bexpr = bexpr; 220 | return c; 221 | } 222 | 223 | // ============================================================================ 224 | // Functions on statements 225 | // ============================================================================ 226 | 227 | // Functions to allocate a statement 228 | Stmt* mkStmt() 229 | { 230 | return astHeap.alloc(); 231 | } 232 | 233 | // Make a skip statement 234 | Stmt* mkSkip() 235 | { 236 | Stmt* s = mkStmt(); 237 | s->tag = SKIP; 238 | return s; 239 | } 240 | 241 | // Make an assignment statement 242 | Stmt* mkAssign(Expr* lhs, Expr* rhs) 243 | { 244 | Stmt* s = mkStmt(); 245 | s->tag = ASSIGN; 246 | s->assign.lhs = lhs; 247 | s->assign.rhs = rhs; 248 | return s; 249 | } 250 | 251 | // Make a sequential composition 252 | Stmt* mkSeq(Stmt *s0, Stmt* s1) 253 | { 254 | Stmt* s = mkStmt(); 255 | s->tag = SEQ; 256 | s->seq.s0 = s0; 257 | s->seq.s1 = s1; 258 | return s; 259 | } 260 | 261 | Stmt* mkWhere(BExpr* cond, Stmt* thenStmt, Stmt* elseStmt) 262 | { 263 | Stmt* s = mkStmt(); 264 | s->tag = WHERE; 265 | s->where.cond = cond; 266 | s->where.thenStmt = thenStmt; 267 | s->where.elseStmt = elseStmt; 268 | return s; 269 | } 270 | 271 | Stmt* mkIf(CExpr* cond, Stmt* thenStmt, Stmt* elseStmt) 272 | { 273 | Stmt* s = mkStmt(); 274 | s->tag = IF; 275 | s->ifElse.cond = cond; 276 | s->ifElse.thenStmt = thenStmt; 277 | s->ifElse.elseStmt = elseStmt; 278 | return s; 279 | } 280 | 281 | Stmt* mkWhile(CExpr* cond, Stmt* body) 282 | { 283 | Stmt* s = mkStmt(); 284 | s->tag = WHILE; 285 | s->loop.cond = cond; 286 | s->loop.body = body; 287 | return s; 288 | } 289 | 290 | Stmt* mkFor(CExpr* cond, Stmt* inc, Stmt* body) 291 | { 292 | Stmt* s = mkStmt(); 293 | s->tag = FOR; 294 | s->forLoop.cond = cond; 295 | s->forLoop.inc = inc; 296 | s->forLoop.body = body; 297 | return s; 298 | } 299 | 300 | Stmt* mkPrint(PrintTag t, Expr* e) 301 | { 302 | Stmt* s = mkStmt(); 303 | s->tag = PRINT; 304 | s->print.tag = t; 305 | s->print.expr = e; 306 | return s; 307 | } 308 | -------------------------------------------------------------------------------- /Lib/Source/Syntax.h: -------------------------------------------------------------------------------- 1 | // This module defines the abstract syntax of the QPU language. 2 | 3 | #ifndef _SOURCE_SYNTAX_H_ 4 | #define _SOURCE_SYNTAX_H_ 5 | 6 | #include "Common/Heap.h" 7 | #include "Common/Stack.h" 8 | 9 | // ============================================================================ 10 | // Operators 11 | // ============================================================================ 12 | 13 | // Operator id 14 | // (Note: order of operators is important to the random generator.) 15 | enum OpId { 16 | // Int & Float operators: 17 | ROTATE, ADD, SUB, MUL, MIN, MAX, 18 | 19 | // Int only operators: 20 | SHL, SHR, USHR, BOR, BAND, BXOR, BNOT, ROR, 21 | 22 | // Conversion operators: 23 | ItoF, FtoI 24 | }; 25 | 26 | // Every operator has a type associated with it 27 | enum BaseType { UINT8, INT16, INT32, FLOAT }; 28 | 29 | // Pair containing operator and base type 30 | struct Op { OpId op; BaseType type; }; 31 | 32 | // Construct an 'Op' 33 | Op mkOp(OpId op, BaseType type); 34 | 35 | // Is operator unary? 36 | bool isUnary(Op op); 37 | 38 | // Is operator commutative? 39 | bool isCommutative(Op op); 40 | 41 | // ============================================================================ 42 | // Variables 43 | // ============================================================================ 44 | 45 | // What kind of variable is it 46 | enum VarTag { 47 | STANDARD // A standard variable that can be stored 48 | // in a general-purpose register on a QPU 49 | , UNIFORM // (Read-only.) Reading this variable will consume a value 50 | // (replicated 16 times) from the QPU's UNIFORM FIFO 51 | // (this is how parameters are passed to kernels). 52 | , QPU_NUM // (Read-only.) Reading this variable will yield the 53 | // QPU's unique id (replicated 16 times). 54 | , ELEM_NUM // (Read-only.) Reading this variable will yield a vector 55 | // containing the integers from 0 to 15. 56 | , TMU0_ADDR // (Write-only.) Initiate load via TMU 57 | }; 58 | 59 | typedef int VarId; 60 | 61 | struct Var { 62 | VarTag tag; 63 | 64 | // A unique identifier for a standard variable 65 | VarId id; 66 | }; 67 | 68 | // Reserved general-purpose vars 69 | enum ReservedVarId { 70 | RSV_QPU_ID = 0, 71 | RSV_NUM_QPUS = 1, 72 | RSV_READ_STRIDE = 2, 73 | RSV_WRITE_STRIDE = 3 74 | }; 75 | 76 | // ============================================================================ 77 | // Expressions 78 | // ============================================================================ 79 | 80 | // What kind of expression is it? 81 | enum ExprTag { INT_LIT, FLOAT_LIT, VAR, APPLY, DEREF }; 82 | 83 | struct Expr { 84 | // What kind of expression is it? 85 | ExprTag tag; 86 | 87 | union { 88 | // Integer literal 89 | int intLit; 90 | 91 | // Float literal 92 | float floatLit; 93 | 94 | // Variable identifier 95 | Var var; 96 | 97 | // Application of a binary operator 98 | struct { Expr* lhs; Op op; Expr* rhs; } apply; 99 | 100 | // Dereference a pointer 101 | struct { Expr* ptr; } deref; 102 | }; 103 | }; 104 | 105 | // Functions to construct expressions 106 | Expr* mkExpr(); 107 | Expr* mkIntLit(int lit); 108 | Expr* mkFloatLit(float lit); 109 | Expr* mkVar(Var var); 110 | Expr* mkApply(Expr* lhs, Op op, Expr* rhs); 111 | Expr* mkDeref(Expr* ptr); 112 | 113 | // Is an expression a literal? 114 | bool isLit(Expr* e); 115 | 116 | // ============================================================================ 117 | // Comparison operators 118 | // ============================================================================ 119 | 120 | // Comparison operators 121 | enum CmpOpId { EQ, NEQ, LT, GT, LE, GE }; 122 | 123 | // Pair containing comparison operator and base type 124 | struct CmpOp { CmpOpId op; BaseType type; }; 125 | 126 | // Construct an 'Op' 127 | CmpOp mkCmpOp(CmpOpId op, BaseType type); 128 | 129 | // ============================================================================ 130 | // Boolean expressions 131 | // ============================================================================ 132 | 133 | // Kinds of boolean expressions 134 | enum BExprTag { NOT, AND, OR, CMP }; 135 | 136 | struct BExpr { 137 | // What kind of boolean expression is it? 138 | BExprTag tag; 139 | 140 | union { 141 | // Negation 142 | BExpr* neg; 143 | 144 | // Conjunction 145 | struct { BExpr* lhs; BExpr* rhs; } conj; 146 | 147 | // Disjunction 148 | struct { BExpr* lhs; BExpr* rhs; } disj; 149 | 150 | // Comparison 151 | struct { Expr* lhs; CmpOp op; Expr* rhs; } cmp; 152 | }; 153 | }; 154 | 155 | // Functions to construct boolean expressions 156 | BExpr* mkBExpr(); 157 | BExpr* mkNot(BExpr* neg); 158 | BExpr* mkAnd(BExpr* lhs, BExpr* rhs); 159 | BExpr* mkOr (BExpr* lhs, BExpr* rhs); 160 | BExpr* mkCmp(Expr* lhs, CmpOp op, Expr* rhs); 161 | 162 | // ============================================================================ 163 | // Conditional expressions 164 | // ============================================================================ 165 | 166 | // Kinds of conditional expressions 167 | enum CExprTag { ALL, ANY }; 168 | 169 | struct CExpr { 170 | // What kind of boolean expression is it? 171 | CExprTag tag; 172 | 173 | // This is either a scalar boolean expression, or a reduction of a vector 174 | // boolean expressions using 'any' or 'all' operators. 175 | BExpr* bexpr; 176 | }; 177 | 178 | // Functions to construct conditional expressions 179 | CExpr* mkCExpr(); 180 | CExpr* mkAll(BExpr* bexpr); 181 | CExpr* mkAny(BExpr* bexpr); 182 | 183 | // ============================================================================ 184 | // 'print' statements 185 | // ============================================================================ 186 | 187 | // For displaying values in emulation 188 | enum PrintTag { PRINT_INT, PRINT_FLOAT, PRINT_STR }; 189 | 190 | struct PrintStmt { 191 | PrintTag tag; 192 | union { 193 | const char* str; 194 | Expr* expr; 195 | }; 196 | }; 197 | 198 | // ============================================================================ 199 | // Statements 200 | // ============================================================================ 201 | 202 | // What kind of statement is it? 203 | enum StmtTag { 204 | SKIP, ASSIGN, SEQ, WHERE, 205 | IF, WHILE, PRINT, FOR, 206 | SET_READ_STRIDE, SET_WRITE_STRIDE, 207 | LOAD_RECEIVE, STORE_REQUEST, FLUSH, 208 | SEND_IRQ_TO_HOST, SEMA_INC, SEMA_DEC }; 209 | 210 | struct Stmt { 211 | // What kind of statement is it? 212 | StmtTag tag; 213 | 214 | union { 215 | // Assignment 216 | struct { Expr* lhs; Expr* rhs; } assign; 217 | 218 | // Sequential composition 219 | struct { Stmt* s0; Stmt* s1; } seq; 220 | 221 | // Where 222 | struct { BExpr* cond; Stmt* thenStmt; Stmt* elseStmt; } where; 223 | 224 | // If 225 | struct { CExpr* cond; Stmt* thenStmt; Stmt* elseStmt; } ifElse; 226 | 227 | // While 228 | struct { CExpr* cond; Stmt* body; } loop; 229 | 230 | // For (only used intermediately during AST construction) 231 | struct { CExpr* cond; Stmt* inc; Stmt* body; } forLoop; 232 | 233 | // Print 234 | PrintStmt print; 235 | 236 | // Set stride 237 | Expr* stride; 238 | 239 | // Load receive destination 240 | Expr* loadDest; 241 | 242 | // Store request 243 | struct { Expr* data; Expr* addr; } storeReq; 244 | 245 | // Semaphore id for increment / decrement 246 | int semaId; 247 | }; 248 | }; 249 | 250 | // Functions to construct statements 251 | Stmt* mkStmt(); 252 | Stmt* mkSkip(); 253 | Stmt* mkAssign(Expr* lhs, Expr* rhs); 254 | Stmt* mkSeq(Stmt* s0, Stmt* s1); 255 | Stmt* mkWhere(BExpr* cond, Stmt* thenStmt, Stmt* elseStmt); 256 | Stmt* mkIf(CExpr* cond, Stmt* thenStmt, Stmt* elseStmt); 257 | Stmt* mkWhile(CExpr* cond, Stmt* body); 258 | Stmt* mkFor(CExpr* cond, Stmt* inc, Stmt* body); 259 | Stmt* mkPrint(PrintTag t, Expr* e); 260 | 261 | // ============================================================================ 262 | // Global variables 263 | // ============================================================================ 264 | 265 | // Obtain a fresh variable 266 | Var freshVar(); 267 | 268 | // Number of fresh vars used 269 | int getFreshVarCount(); 270 | 271 | // Reset fresh variable generator 272 | void resetFreshVarGen(); 273 | void resetFreshVarGen(int val); 274 | 275 | // Used for constructing abstract syntax trees 276 | extern Heap astHeap; 277 | extern Stack stmtStack; 278 | extern Stack controlStack; 279 | 280 | #endif 281 | -------------------------------------------------------------------------------- /Lib/Source/Translate.h: -------------------------------------------------------------------------------- 1 | #ifndef _TRANSLATE_H_ 2 | #define _TRANSLATE_H_ 3 | 4 | #include "Common/Seq.h" 5 | #include "Source/Syntax.h" 6 | #include "Target/Syntax.h" 7 | 8 | void insertEndCode(Seq* seq); 9 | void translateStmt(Seq* seq, Stmt* s); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /Lib/Target/CFG.cpp: -------------------------------------------------------------------------------- 1 | // Control-flow graphs (CFGs) 2 | 3 | #include 4 | #include "Target/CFG.h" 5 | #include "Target/Syntax.h" 6 | 7 | // ============================================================================ 8 | // Build control-flow graph 9 | // ============================================================================ 10 | 11 | // Build a CFG for a given instruction sequence. 12 | 13 | void buildCFG(Seq* instrs, CFG* cfg) 14 | { 15 | // ---------- 16 | // First pass 17 | // ---------- 18 | // 19 | // 1. Each instruction is a successor of the previous 20 | // instruction, unless the previous instruction 21 | // is an unconditional jump or halt instruction. 22 | // 23 | // 2. Compute a mapping from labels to instruction ids. 24 | 25 | // Number of labels in program 26 | int numLabels = getFreshLabelCount(); 27 | 28 | // Mapping from labels to instruction ids 29 | InstrId* labelMap = new InstrId [numLabels]; 30 | 31 | // Initialise label mapping 32 | for (int i = 0; i < numLabels; i++) 33 | labelMap[i] = -1; 34 | 35 | for (int i = 0; i < instrs->numElems; i++) { 36 | // Get instruction 37 | Instr instr = instrs->elems[i]; 38 | 39 | // Is it an unconditional jump? 40 | bool uncond = instr.tag == BRL && instr.BRL.cond.tag == COND_ALWAYS; 41 | 42 | // Is it a final instruction? 43 | bool end = instr.tag == END || i+1 == instrs->numElems; 44 | 45 | // Add successor 46 | cfg->extend(); 47 | if (! (uncond || end)) 48 | cfg->elems[i].insert(i+1); 49 | 50 | // Remember location of each label 51 | if (instr.tag == LAB) { 52 | assert(instr.label >= 0 && instr.label < numLabels); 53 | labelMap[instr.label] = i; 54 | } 55 | } 56 | 57 | // ----------- 58 | // Second pass 59 | // ----------- 60 | // 61 | // Add a successor for each conditional jump. 62 | 63 | for (int i = 0; i < instrs->numElems; i++) { 64 | Instr instr = instrs->elems[i]; 65 | if (instr.tag == BRL) { 66 | assert(labelMap[instr.BRL.label] >= 0); 67 | cfg->elems[i].insert(labelMap[instr.BRL.label]); 68 | } 69 | } 70 | 71 | // Free memory 72 | delete [] labelMap; 73 | } 74 | 75 | // ============================================================================ 76 | // Reverse the arrows in a CFG 77 | // ============================================================================ 78 | 79 | // Given a mapping from instruction ids to successors, produce a 80 | // mapping from instruction ids to predecessors. 81 | 82 | void reverseCFG(CFG* succs, CFG* preds) 83 | { 84 | int n = succs->numElems; 85 | 86 | // Make preds the same size as succs 87 | preds->setCapacity(n); 88 | preds->numElems = n; 89 | 90 | for (int i = 0; i < n; i++) { 91 | Succs* s = &succs->elems[i]; 92 | for (int j = 0; j < s->numElems; j++) { 93 | InstrId succ = s->elems[j]; 94 | preds->elems[succ].insert(i); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /Lib/Target/CFG.h: -------------------------------------------------------------------------------- 1 | // Control-flow graphs (CFGs) 2 | 3 | #ifndef _CFG_H_ 4 | #define _CFG_H_ 5 | 6 | #include "Common/Seq.h" 7 | #include "Target/Syntax.h" 8 | 9 | // A set of successors. 10 | 11 | typedef SmallSeq Succs; 12 | 13 | // A CFG is simply a set of successors 14 | // for each instruction. 15 | 16 | typedef Seq CFG; 17 | 18 | // Function to construct a CFG. 19 | 20 | void buildCFG(Seq* instrs, CFG* cfg); 21 | 22 | // Function to reverse the arrows in a CFG. 23 | 24 | void reverseCFG(CFG* succs, CFG* preds); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /Lib/Target/Emulator.h: -------------------------------------------------------------------------------- 1 | #ifndef _EMULATOR_H_ 2 | #define _EMULATOR_H_ 3 | 4 | #include 5 | #include "Common/Seq.h" 6 | #include "Target/Syntax.h" 7 | 8 | #define VPM_SIZE 2048 9 | #define NUM_LANES 16 10 | #define MAX_QPUS 12 11 | #define EMULATOR_HEAP_SIZE 65536 12 | 13 | // This is a type for representing the values in a vector 14 | union Word { 15 | int32_t intVal; 16 | float floatVal; 17 | }; 18 | 19 | // Vector values 20 | struct Vec { 21 | Word elems[NUM_LANES]; 22 | }; 23 | 24 | // In-flight DMA requests 25 | struct DMAReq { 26 | bool active; 27 | Word addr; 28 | BufferAorB buffer; 29 | }; 30 | 31 | // VPM load queue (max 2 elements) 32 | struct VPMLoadQueue { 33 | int addrs[3]; 34 | int front, back; 35 | }; 36 | 37 | // State of a single QPU. 38 | struct QPUState { 39 | int id; // QPU id 40 | int numQPUs; // QPU count 41 | bool running; // Is QPU active, or has it halted? 42 | int pc; // Program counter 43 | Vec* regFileA; // Register file A 44 | int sizeRegFileA; // (and size) 45 | Vec* regFileB; // Register file B 46 | int sizeRegFileB; // (and size) 47 | Vec accum[6]; // Accumulator registers 48 | bool negFlags[NUM_LANES]; // Negative flags 49 | bool zeroFlags[NUM_LANES]; // Zero flags 50 | int nextUniform; // Pointer to next uniform to read 51 | DMAReq dmaLoad; // In-flight DMA load 52 | DMAReq dmaStore; // In-flight DMA store 53 | VPMLoadQueue vpmLoadQueue; // VPM load queue 54 | int readStride; // Read stride 55 | int writeStride; // Write stride 56 | SmallSeq* loadBuffer; // Load buffer for loads via TMU 57 | }; 58 | 59 | // State of the VideoCore. 60 | struct State { 61 | QPUState qpu[MAX_QPUS]; // State of each QPU 62 | Word vpm[VPM_SIZE]; // Shared VPM memory 63 | Seq* output; // Output for print statements 64 | int sema[16]; // Semaphores 65 | }; 66 | 67 | // Emulator 68 | void emulate 69 | ( int numQPUs // Number of QPUs active 70 | , Seq* instrs // Instruction sequence 71 | , int maxReg // Max reg id used 72 | , Seq* uniforms // Kernel parameters 73 | , Seq* output // Output from print statements 74 | // (if NULL, stdout is used) 75 | ); 76 | 77 | // Heap used in emulation mode. 78 | extern uint32_t emuHeapEnd; 79 | extern int32_t* emuHeap; 80 | 81 | // Rotate a vector 82 | Vec rotate(Vec v, int n); 83 | 84 | // Printing routines 85 | void emitChar(Seq* out, char c); 86 | void emitStr(Seq* out, const char* s); 87 | void printIntVec(Seq* out, Vec x); 88 | void printFloatVec(Seq* out, Vec x); 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /Lib/Target/Encode.h: -------------------------------------------------------------------------------- 1 | #ifndef _ENCODE_H_ 2 | #define _ENCODE_H_ 3 | 4 | #include 5 | #include "Target/Syntax.h" 6 | #include "Common/Seq.h" 7 | 8 | void encode(Seq* instrs, Seq* code); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /Lib/Target/LiveRangeSplit.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Source/Syntax.h" 3 | #include "Target/ReachingDefs.h" 4 | #include "Target/Subst.h" 5 | #include "Target/Liveness.h" 6 | 7 | // ============================================================================ 8 | // Live-range splitter 9 | // ============================================================================ 10 | 11 | // First, a helper function that renames the variable v defined by an 12 | // instruction to w, along with all uses of that variable reached-by 13 | // the instruction, and recursively all definitions of that variable 14 | // that reach one of these uses. 15 | 16 | void renameDef(Seq* instrs, 17 | InstrId i, 18 | RegId v, 19 | RegId w, 20 | bool* visited, 21 | ReachingDefs* reachedBy, 22 | DefsOf* defsOf) 23 | { 24 | // If we haven't previously visited instruction i then proceeed 25 | if (visited[i]) return; 26 | visited[i] = true; 27 | 28 | // Rename destination register from v to w 29 | Instr* instr = &instrs->elems[i]; 30 | renameDest(instr, REG_A, v, REG_B, w); 31 | 32 | ReachSet* reached = &reachedBy->elems[i]; 33 | // For each instruction reached by i 34 | for (int j = 0; j < reached->numElems; j++) { 35 | InstrId rid = reached->elems[j]; 36 | Instr* r = &instrs->elems[rid]; 37 | 38 | // Rename uses of v to w 39 | renameUses(r, REG_A, v, REG_B, w); 40 | 41 | // For each instruction d defining v 42 | SmallSeq* ds = &defsOf->elems[v]; 43 | for (int k = 0; k < ds->numElems; k++) { 44 | InstrId d = ds->elems[k]; 45 | // If r is reached-by d 46 | if (reachedBy->elems[d].member(rid) 47 | || (d == rid && isCondAssign(r))) 48 | // Recursively modify definition d to define w 49 | renameDef(instrs, d, v, w, visited, reachedBy, defsOf); 50 | } 51 | } 52 | } 53 | 54 | // Now for the top-level routine. 55 | 56 | void liveRangeSplit(Seq* instrs, CFG* cfg) 57 | { 58 | // Determine for each variable, the instructions that assign to it 59 | DefsOf defsOf; 60 | computeDefsOf(instrs, &defsOf); 61 | 62 | // Determine instructions reached by each definition 63 | ReachingDefs reachedBy; 64 | computeReachedBy(instrs, cfg, &reachedBy); 65 | 66 | // Keep track of which instructions we've visisted 67 | bool* visited = new bool [instrs->numElems]; 68 | 69 | // Initialise visited array 70 | for (int i = 0; i < instrs->numElems; i++) 71 | visited[i] = false; 72 | 73 | // Unique register id 74 | RegId next = 0; 75 | 76 | for (int i = 0; i < instrs->numElems; i++) 77 | if (!visited[i]) { 78 | // Compute vars defined by instruction 79 | UseDef set; 80 | useDef(instrs->elems[i], &set); 81 | 82 | // For each var defined by instruction 83 | for (int j = 0; j < set.def.numElems; j++) 84 | renameDef(instrs, i, set.def.elems[j], next++, 85 | visited, &reachedBy, &defsOf); 86 | } 87 | 88 | // Every instruction should now soley use register file B. 89 | // Go through and make them use register file A instead. 90 | for (int i = 0; i < instrs->numElems; i++) 91 | substRegTag(&instrs->elems[i], REG_B, REG_A); 92 | 93 | // Update fresh var counter 94 | resetFreshVarGen(next); 95 | 96 | // Free memory 97 | delete [] visited; 98 | } 99 | -------------------------------------------------------------------------------- /Lib/Target/LiveRangeSplit.h: -------------------------------------------------------------------------------- 1 | #ifndef _LIVERANGESPLIT_H_ 2 | #define _LIVERANGESPLIT_H_ 3 | 4 | #include "Common/Seq.h" 5 | #include "Target/CFG.h" 6 | #include "Target/Syntax.h" 7 | 8 | void liveRangeSplit(Seq* instrs, CFG* cfg); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /Lib/Target/Liveness.cpp: -------------------------------------------------------------------------------- 1 | // Liveness analysis 2 | 3 | #include "Target/Liveness.h" 4 | 5 | // ============================================================================ 6 | // Compute 'use' and 'def' sets 7 | // ============================================================================ 8 | 9 | // 'use' set: the variables read by an instruction 10 | // 'def' set: the variables modified by an instruction 11 | 12 | // Compute 'use' and 'def' sets for a given instruction 13 | 14 | void useDefReg(Instr instr, UseDefReg* useDef) 15 | { 16 | // Make the 'use' and 'def' sets empty 17 | useDef->use.clear(); 18 | useDef->def.clear(); 19 | 20 | switch (instr.tag) { 21 | // Load immediate 22 | case LI: 23 | // Add destination reg to 'def' set 24 | useDef->def.insert(instr.LI.dest); 25 | 26 | // Add destination reg to 'use' set if conditional assigment 27 | if (instr.LI.cond.tag != ALWAYS) 28 | useDef->use.insert(instr.LI.dest); 29 | return; 30 | 31 | // ALU operation 32 | case ALU: 33 | // Add destination reg to 'def' set 34 | useDef->def.insert(instr.ALU.dest); 35 | 36 | // Add destination reg to 'use' set if conditional assigment 37 | if (instr.ALU.cond.tag != ALWAYS) 38 | useDef->use.insert(instr.ALU.dest); 39 | 40 | // Add source reg A to 'use' set 41 | if (instr.ALU.srcA.tag == REG) 42 | useDef->use.insert(instr.ALU.srcA.reg); 43 | 44 | // Add source reg B to 'use' set 45 | if (instr.ALU.srcB.tag == REG) 46 | useDef->use.insert(instr.ALU.srcB.reg); 47 | return; 48 | 49 | // LD1 instruction 50 | case LD1: 51 | // Add source reg to 'use' set 52 | useDef->use.insert(instr.LD1.addr); 53 | return; 54 | 55 | // LD4 instruction 56 | case LD4: 57 | // Add dest reg to 'def' set 58 | useDef->def.insert(instr.LD4.dest); 59 | return; 60 | 61 | // ST1 instruction 62 | case ST1: 63 | // Add source reg to 'use' set 64 | useDef->use.insert(instr.ST1.data); 65 | return; 66 | 67 | // ST2 instruction 68 | case ST2: 69 | // Add source reg to 'use' set 70 | useDef->use.insert(instr.ST2.addr); 71 | return; 72 | 73 | // Print integer instruction 74 | case PRI: 75 | // Add source reg to 'use' set 76 | useDef->use.insert(instr.PRI); 77 | return; 78 | 79 | // Print float instruction 80 | case PRF: 81 | // Add source reg to 'use' set 82 | useDef->use.insert(instr.PRF); 83 | return; 84 | 85 | // Load receive instruction 86 | case RECV: 87 | // Add dest reg to 'def' set 88 | useDef->def.insert(instr.RECV.dest); 89 | return; 90 | } 91 | } 92 | 93 | // Same function as above, except only yeilds ids of registers in 94 | // register file A. 95 | 96 | void useDef(Instr instr, UseDef* out) 97 | { 98 | UseDefReg set; 99 | useDefReg(instr, &set); 100 | out->use.clear(); 101 | out->def.clear(); 102 | for (int i = 0; i < set.use.numElems; i++) { 103 | Reg r = set.use.elems[i]; 104 | if (r.tag == REG_A) out->use.append(r.regId); 105 | } 106 | for (int i = 0; i < set.def.numElems; i++) { 107 | Reg r = set.def.elems[i]; 108 | if (r.tag == REG_A) out->def.append(r.regId); 109 | } 110 | } 111 | 112 | // Compute the union of the 'use' sets of the successors of a given 113 | // instruction. 114 | 115 | void useSetOfSuccs(Seq* instrs, CFG* cfg, 116 | InstrId i, SmallSeq* use) 117 | { 118 | use->clear(); 119 | Succs* s = &cfg->elems[i]; 120 | for (int j = 0; j < s->numElems; j++) { 121 | UseDef set; 122 | useDef(instrs->elems[s->elems[j]], &set); 123 | for (int k = 0; k < set.use.numElems; k++) 124 | use->insert(set.use.elems[k]); 125 | } 126 | } 127 | 128 | // Return true if given instruction has two register operands. 129 | 130 | bool getTwoUses(Instr instr, Reg* r1, Reg* r2) 131 | { 132 | if (instr.tag == ALU && instr.ALU.srcA.tag == REG 133 | && instr.ALU.srcB.tag == REG) { 134 | *r1 = instr.ALU.srcA.reg; 135 | *r2 = instr.ALU.srcB.reg; 136 | return true; 137 | } 138 | return false; 139 | } 140 | 141 | // ============================================================================ 142 | // Compute live sets for each instruction 143 | // ============================================================================ 144 | 145 | // Compute the live-out variables of an instruction, given the live-in 146 | // variables of all instructions and the CFG. 147 | 148 | void computeLiveOut(CFG* cfg, Liveness* live, InstrId i, LiveSet* liveOut) 149 | { 150 | liveOut->clear(); 151 | Succs* s = &cfg->elems[i]; 152 | for (int j = 0; j < s->numElems; j++) { 153 | LiveSet* set = &live->elems[s->elems[j]]; 154 | for (int k = 0; k < set->numElems; k++) 155 | liveOut->insert(set->elems[k]); 156 | } 157 | } 158 | 159 | void liveness(Seq* instrs, CFG* cfg, Liveness* live) 160 | { 161 | // Initialise live mapping to have one entry per instruction 162 | live->setCapacity(instrs->numElems); 163 | live->numElems = instrs->numElems; 164 | 165 | // For storing the 'use' and 'def' sets of each instruction 166 | UseDef useDefSets; 167 | 168 | // For temporarily storing live-in and live-out variables 169 | LiveSet liveIn; 170 | LiveSet liveOut; 171 | 172 | // Has a change been made to the liveness mapping? 173 | bool changed = true; 174 | 175 | // Iterate until no change, i.e. fixed point 176 | while (changed) { 177 | changed = false; 178 | 179 | // Propagate live variables backwards 180 | for (int i = instrs->numElems-1; i >= 0; i--) { 181 | // Compute 'use' and 'def' sets 182 | Instr instr = instrs->elems[i]; 183 | useDef(instr, &useDefSets); 184 | 185 | // Compute live-out variables 186 | computeLiveOut(cfg, live, i, &liveOut); 187 | 188 | // Remove the 'def' set from the live-out set to give live-in set 189 | liveIn.clear(); 190 | for (int j = 0; j < liveOut.numElems; j++) { 191 | if (! useDefSets.def.member(liveOut.elems[j])) 192 | liveIn.insert(liveOut.elems[j]); 193 | } 194 | 195 | // Add the 'use' set to the live-in set 196 | for (int j = 0; j < useDefSets.use.numElems; j++) 197 | liveIn.insert(useDefSets.use.elems[j]); 198 | 199 | // Insert the live-in variables into the map 200 | for (int j = 0; j < liveIn.numElems; j++) { 201 | bool inserted = live->elems[i].insert(liveIn.elems[j]); 202 | changed = changed || inserted; 203 | } 204 | } 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /Lib/Target/Liveness.h: -------------------------------------------------------------------------------- 1 | // Liveness analysis 2 | 3 | #ifndef _LIVENESS_H_ 4 | #define _LIVENESS_H_ 5 | 6 | #include "Common/Seq.h" 7 | #include "Target/Syntax.h" 8 | #include "Target/CFG.h" 9 | 10 | // 'use' and 'def' sets: 11 | // * 'use' set: the variables read by an instruction 12 | // * 'def' set: the variables modified by an instruction 13 | 14 | struct UseDefReg { 15 | SmallSeq use; 16 | SmallSeq def; 17 | }; 18 | 19 | struct UseDef { 20 | SmallSeq use; 21 | SmallSeq def; 22 | }; 23 | 24 | // Compute 'use' and 'def' sets for a given instruction 25 | 26 | void useDefReg(Instr instr, UseDefReg* out); 27 | void useDef(Instr instr, UseDef* out); 28 | bool getTwoUses(Instr instr, Reg* r1, Reg* r2); 29 | 30 | // A live set containts the variables 31 | // that are live-in to an instruction. 32 | 33 | typedef SmallSeq LiveSet; 34 | 35 | // The result of liveness analysis is a set 36 | // of live variables for each instruction. 37 | 38 | typedef Seq Liveness; 39 | 40 | // Determine the liveness sets for each instruction. 41 | 42 | void liveness(Seq* instrs, CFG* cfg, Liveness* liveness); 43 | void computeLiveOut(CFG* cfg, Liveness* live, InstrId i, LiveSet* liveOut); 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /Lib/Target/LoadStore.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Source/Syntax.h" 3 | #include "Target/Syntax.h" 4 | #include "Target/LoadStore.h" 5 | 6 | // ============================================================================= 7 | // Stride setup 8 | // ============================================================================= 9 | 10 | // Generate instructions to set the read stride. 11 | 12 | void genSetReadStride(Seq* instrs, int stride) 13 | { 14 | int pitch = (stride+1)*4; 15 | assert(pitch < 8192); 16 | int setup = 0x90000000 | pitch; 17 | Reg dst; dst.tag = REG_A; dst.regId = RSV_READ_STRIDE; 18 | Instr instr = genLI(dst, setup); 19 | instrs->append(instr); 20 | } 21 | 22 | void genSetReadStride(Seq* instrs, Reg stride) 23 | { 24 | Reg pitch = freshReg(); 25 | Reg tmp = freshReg(); 26 | instrs->append(genIncr(pitch, stride, 1)); 27 | instrs->append(genLI(tmp, 0x90000000)); 28 | instrs->append(genLShift(pitch, pitch, 2)); 29 | 30 | Reg dst; dst.tag = REG_A; dst.regId = RSV_READ_STRIDE; 31 | instrs->append(genOR(dst, tmp, pitch)); 32 | } 33 | 34 | // Generate instructions to set the write stride. 35 | 36 | void genSetWriteStride(Seq* instrs, int stride) 37 | { 38 | int strideBytes = stride*4; 39 | assert(strideBytes < 8192); 40 | int setup = 0xc0010000 | strideBytes; 41 | Reg dst; dst.tag = REG_A; dst.regId = RSV_WRITE_STRIDE; 42 | Instr instr = genLI(dst, setup); 43 | instrs->append(instr); 44 | } 45 | 46 | void genSetWriteStride(Seq* instrs, Reg stride) 47 | { 48 | Reg tmp0 = freshReg(); 49 | Reg tmp1 = freshReg(); 50 | instrs->append(genLShift(tmp0, stride, 2)); 51 | instrs->append(genLI(tmp1, 0xc0010000)); 52 | 53 | Reg dst; dst.tag = REG_A; dst.regId = RSV_WRITE_STRIDE; 54 | instrs->append(genOR(dst, tmp0, tmp1)); 55 | } 56 | 57 | // ============================================================================= 58 | // DMA setup 59 | // ============================================================================= 60 | 61 | // Generate instructions to setup DMA load. 62 | 63 | void assignDMALoadSetup(Seq* instrs, Reg dst, BufferAorB b, Reg qpuId) 64 | { 65 | int setup = 0x80101800; 66 | int buffIdx = (16 * (b == A ? 0 : 1)) << 4; 67 | setup |= buffIdx; 68 | 69 | Reg tmp = freshReg(); 70 | instrs->append(genLI(tmp, setup)); 71 | instrs->append(genOR(dst, qpuId, tmp)); 72 | } 73 | 74 | // Generate instructions to setup DMA store. 75 | 76 | void assignDMAStoreSetup(Seq* instrs, Reg dst, BufferAorB b, Reg qpuId) 77 | { 78 | int setup = 0x88014000; 79 | int buffIdx = (16 * (b == A ? 2 : 3)) << 7; 80 | setup |= buffIdx; 81 | 82 | Reg tmp0 = freshReg(); 83 | instrs->append(genLI(tmp0, setup)); 84 | 85 | Reg tmp1 = freshReg(); 86 | instrs->append(genLShift(tmp1, qpuId, 3)); 87 | 88 | instrs->append(genOR(dst, tmp0, tmp1)); 89 | } 90 | 91 | // ============================================================================= 92 | // VPM setup 93 | // ============================================================================= 94 | 95 | // Generate instructions to setup VPM load. 96 | 97 | void assignVPMLoadSetup(Seq* instrs, Reg dst, BufferAorB b, Reg qpuId) 98 | { 99 | int setup = 0x00100200; 100 | int buffIdx = (b == A ? 0 : 1) << 4; 101 | setup |= buffIdx; 102 | 103 | Reg tmp = freshReg(); 104 | instrs->append(genLI(tmp, setup)); 105 | instrs->append(genOR(dst, qpuId, tmp)); 106 | } 107 | 108 | // Generate instructions to setup VPM store. 109 | 110 | void genSetupVPMStore(Seq* instrs, BufferAorB b, Reg qpuId) 111 | { 112 | int setup = 0x00100200; 113 | int buffIdx = (b == A ? 2 : 3) << 4; 114 | setup |= buffIdx; 115 | 116 | Reg tmp = freshReg(); 117 | instrs->append(genLI(tmp, setup)); 118 | 119 | Reg dst; 120 | dst.tag = SPECIAL; 121 | dst.regId = SPECIAL_WR_SETUP; 122 | instrs->append(genOR(dst, qpuId, tmp)); 123 | } 124 | 125 | // ============================================================================ 126 | // Load/Store pass 127 | // ============================================================================ 128 | 129 | void loadStorePass(Seq* instrs) 130 | { 131 | Seq newInstrs(instrs->numElems*2); 132 | 133 | // Put QPU number in a register 134 | Reg qpuId = freshReg(); 135 | Reg qpuNum; qpuNum.tag = SPECIAL; qpuNum.regId = SPECIAL_QPU_NUM; 136 | newInstrs.append(genMove(qpuId, qpuNum)); 137 | 138 | // Initialise strides 139 | genSetReadStride(&newInstrs, 0); 140 | genSetWriteStride(&newInstrs, 0); 141 | 142 | // Initialise load/store setup registers 143 | Reg vpmLoadSetup = freshReg(); 144 | Reg dmaLoadSetup = freshReg(); 145 | Reg dmaStoreSetup = freshReg(); 146 | 147 | assignDMALoadSetup(&newInstrs, dmaLoadSetup, A, qpuId); 148 | assignDMAStoreSetup(&newInstrs, dmaStoreSetup, A, qpuId); 149 | assignVPMLoadSetup(&newInstrs, vpmLoadSetup, A, qpuId); 150 | 151 | genSetupVPMStore(&newInstrs, A, qpuId); 152 | 153 | // Elaborate LD1, LD3 and ST2 intermediate instructions 154 | Reg sp; sp.tag = SPECIAL; 155 | Reg src; src.tag = REG_A; 156 | for (int i = 0; i < instrs->numElems; i++) { 157 | Instr instr = instrs->elems[i]; 158 | switch (instr.tag) { 159 | case LD1: 160 | sp.regId = SPECIAL_RD_SETUP; 161 | src.regId = RSV_READ_STRIDE; 162 | newInstrs.append(genMove(sp, src)); 163 | newInstrs.append(genMove(sp, dmaLoadSetup)); 164 | sp.regId = SPECIAL_DMA_LD_ADDR; 165 | newInstrs.append(genMove(sp, instr.LD1.addr)); 166 | break; 167 | case LD3: 168 | sp.regId = SPECIAL_RD_SETUP; 169 | newInstrs.append(genMove(sp, vpmLoadSetup)); 170 | for (int j = 0; j < 3; j++) 171 | newInstrs.append(nop()); 172 | break; 173 | case ST2: 174 | sp.regId = SPECIAL_WR_SETUP; 175 | src.regId = RSV_WRITE_STRIDE; 176 | newInstrs.append(genMove(sp, src)); 177 | newInstrs.append(genMove(sp, dmaStoreSetup)); 178 | sp.regId = SPECIAL_DMA_ST_ADDR; 179 | newInstrs.append(genMove(sp, instr.ST2.addr)); 180 | break; 181 | case RECV: { 182 | instr.tag = TMU0_TO_ACC4; 183 | newInstrs.append(instr); 184 | 185 | Instr move; 186 | move.tag = ALU; 187 | move.ALU.setFlags = false; 188 | move.ALU.cond.tag = ALWAYS; 189 | move.ALU.dest = instr.RECV.dest; 190 | move.ALU.srcA.tag = REG; 191 | move.ALU.srcA.reg.tag = ACC; 192 | move.ALU.srcA.reg.regId = 4; 193 | move.ALU.op = A_BOR; 194 | move.ALU.srcB.tag = REG; 195 | move.ALU.srcB.reg.tag = ACC; 196 | move.ALU.srcB.reg.regId = 4; 197 | newInstrs.append(move); 198 | break; 199 | } 200 | default: 201 | newInstrs.append(instr); 202 | break; 203 | } 204 | } 205 | 206 | // Update original instruction sequence 207 | instrs->clear(); 208 | for (int i = 0; i < newInstrs.numElems; i++) 209 | instrs->append(newInstrs.elems[i]); 210 | } 211 | -------------------------------------------------------------------------------- /Lib/Target/LoadStore.h: -------------------------------------------------------------------------------- 1 | #ifndef _LOADSTORE_H_ 2 | #define _LOADSTORE_H_ 3 | 4 | #include "Common/Seq.h" 5 | #include "Target/Syntax.h" 6 | 7 | void genSetReadStride(Seq* instrs, int stride); 8 | void genSetReadStride(Seq* instrs, Reg stride); 9 | void genSetWriteStride(Seq* instrs, int stride); 10 | void genSetWriteStride(Seq* instrs, Reg stride); 11 | void loadStorePass(Seq* instrs); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /Lib/Target/Pretty.cpp: -------------------------------------------------------------------------------- 1 | #include "Target/Syntax.h" 2 | #include "Target/SmallLiteral.h" 3 | 4 | void pretty(SubWord sw) 5 | { 6 | switch (sw) { 7 | case A8: printf("[7:0]"); return; 8 | case B8: printf("[15:8]"); return; 9 | case C8: printf("[23:16]"); return; 10 | case D8: printf("[31:24]"); return; 11 | case A16: printf("[15:0]"); return; 12 | case B16: printf("[31:16]"); return; 13 | } 14 | } 15 | 16 | const char* specialStr(RegId rid) 17 | { 18 | Special s = (Special) rid; 19 | switch (s) { 20 | case SPECIAL_UNIFORM: return "UNIFORM"; 21 | case SPECIAL_ELEM_NUM: return "ELEM_NUM"; 22 | case SPECIAL_QPU_NUM: return "QPU_NUM"; 23 | case SPECIAL_RD_SETUP: return "RD_SETUP"; 24 | case SPECIAL_WR_SETUP: return "WR_SETUP"; 25 | case SPECIAL_DMA_ST_ADDR: return "DMA_ST_ADDR"; 26 | case SPECIAL_DMA_LD_ADDR: return "DMA_LD_ADDR"; 27 | case SPECIAL_VPM_READ: return "VPM_READ"; 28 | case SPECIAL_VPM_WRITE: return "VPM_WRITE"; 29 | case SPECIAL_HOST_INT: return "HOST_INT"; 30 | case SPECIAL_TMU0_S: return "TMU0_S"; 31 | } 32 | 33 | // Unreachable 34 | assert(false); 35 | } 36 | 37 | void pretty(Reg r) 38 | { 39 | switch (r.tag) { 40 | case REG_A: 41 | printf("A%i", r.regId); 42 | return; 43 | case REG_B: printf("B%i", r.regId); return; 44 | case ACC: printf("ACC%i", r.regId); return; 45 | case SPECIAL: printf("S[%s]", specialStr(r.regId)); return; 46 | case NONE: printf("_"); return; 47 | } 48 | } 49 | 50 | void pretty(Flag flag) 51 | { 52 | switch (flag) { 53 | case ZS: printf("ZS"); return; 54 | case ZC: printf("ZC"); return; 55 | case NS: printf("NS"); return; 56 | case NC: printf("NC"); return; 57 | } 58 | } 59 | 60 | void pretty(BranchCond cond) 61 | { 62 | switch (cond.tag) { 63 | case COND_ALL: 64 | printf("all("); 65 | pretty(cond.flag); 66 | printf(")"); 67 | return; 68 | case COND_ANY: 69 | printf("any("); 70 | pretty(cond.flag); 71 | printf(")"); 72 | return; 73 | case COND_ALWAYS: 74 | printf("always"); 75 | return; 76 | case COND_NEVER: 77 | printf("never"); 78 | return; 79 | } 80 | } 81 | 82 | void pretty(AssignCond cond) 83 | { 84 | switch (cond.tag) { 85 | case ALWAYS: printf("always"); return; 86 | case NEVER: printf("never"); return; 87 | case FLAG: pretty(cond.flag); return; 88 | } 89 | } 90 | 91 | void pretty(Imm imm) { 92 | switch (imm.tag) { 93 | case IMM_INT32: 94 | printf("%d", imm.intVal); 95 | return; 96 | case IMM_FLOAT32: 97 | printf("%f", imm.floatVal); 98 | return; 99 | case IMM_MASK: 100 | int b = imm.intVal; 101 | for (int i = 0; i < 16; i++) { 102 | printf("%i", b&1 ? 1 : 0); 103 | b >>= 1; 104 | } 105 | return; 106 | } 107 | } 108 | 109 | void pretty(SmallImm imm) 110 | { 111 | switch (imm.tag) { 112 | case SMALL_IMM: printSmallLit(imm.val); return; 113 | case ROT_ACC: printf("ROT(ACC5)"); return; 114 | case ROT_IMM: printf("ROT(%i)", imm.val); return; 115 | } 116 | } 117 | 118 | void pretty(RegOrImm r) 119 | { 120 | switch (r.tag) { 121 | case REG: pretty(r.reg); return; 122 | case IMM: pretty(r.smallImm); return; 123 | } 124 | } 125 | 126 | void pretty(ALUOp op) 127 | { 128 | switch (op) { 129 | case NOP: printf("nop"); return; 130 | case A_FADD: printf("addf"); return; 131 | case A_FSUB: printf("subf"); return; 132 | case A_FMIN: printf("minf"); return; 133 | case A_FMAX: printf("maxf"); return; 134 | case A_FMINABS: printf("minabsf"); return; 135 | case A_FMAXABS: printf("maxabsf"); return; 136 | case A_FtoI: printf("ftoi"); return; 137 | case A_ItoF: printf("itof"); return; 138 | case A_ADD: printf("add"); return; 139 | case A_SUB: printf("sub"); return; 140 | case A_SHR: printf("shr"); return; 141 | case A_ASR: printf("asr"); return; 142 | case A_ROR: printf("ror"); return; 143 | case A_SHL: printf("shl"); return; 144 | case A_MIN: printf("min"); return; 145 | case A_MAX: printf("max"); return; 146 | case A_BAND: printf("and"); return; 147 | case A_BOR: printf("or"); return; 148 | case A_BXOR: printf("xor"); return; 149 | case A_BNOT: printf("not"); return; 150 | case A_CLZ: printf("clz"); return; 151 | case A_V8ADDS: printf("addsatb"); return; 152 | case A_V8SUBS: printf("subsatb"); return; 153 | case M_FMUL: printf("mulf"); return; 154 | case M_MUL24: printf("mul24"); return; 155 | case M_V8MUL: printf("mulb"); return; 156 | case M_V8MIN: printf("minb"); return; 157 | case M_V8MAX: printf("maxb"); return; 158 | case M_V8ADDS: printf("m_addsatb"); return; 159 | case M_V8SUBS: printf("m_subsatb"); return; 160 | case M_ROTATE: printf("rotate"); return; 161 | } 162 | } 163 | 164 | void pretty(BranchTarget target) 165 | { 166 | if (target.relative) 167 | printf("PC+1+"); 168 | if (target.useRegOffset) 169 | printf("A%i+", target.regOffset); 170 | printf("%i", target.immOffset); 171 | } 172 | 173 | void pretty(BufferAorB buffer) 174 | { 175 | if (buffer == A) printf("A"); 176 | if (buffer == B) printf("B"); 177 | } 178 | 179 | void pretty(Instr instr) 180 | { 181 | switch (instr.tag) { 182 | case LI: 183 | if (instr.LI.cond.tag != ALWAYS) { 184 | printf("where "); 185 | pretty(instr.LI.cond); 186 | printf(": "); 187 | } 188 | pretty(instr.LI.dest); 189 | printf(" <-%s ", instr.LI.setFlags ? "{sf}" : ""); 190 | pretty(instr.LI.imm); 191 | printf("\n"); 192 | return; 193 | case ALU: 194 | if (instr.ALU.cond.tag != ALWAYS) { 195 | printf("where "); 196 | pretty(instr.ALU.cond); 197 | printf(": "); 198 | } 199 | pretty(instr.ALU.dest); 200 | printf(" <-%s ", instr.ALU.setFlags ? "{sf}" : ""); 201 | pretty(instr.ALU.op); 202 | printf("("); 203 | pretty(instr.ALU.srcA); 204 | printf(", "); 205 | pretty(instr.ALU.srcB); 206 | printf(")\n"); 207 | return; 208 | case END: 209 | printf("END\n"); 210 | return; 211 | case BR: 212 | printf("if "); 213 | pretty(instr.BR.cond); 214 | printf(" goto "); 215 | pretty(instr.BR.target); 216 | printf("\n"); 217 | return; 218 | case BRL: 219 | printf("if "); 220 | pretty(instr.BRL.cond); 221 | printf(" goto L%i\n", instr.BRL.label); 222 | return; 223 | case LAB: 224 | printf("L%i:\n", instr.label); 225 | return; 226 | case NO_OP: 227 | printf("NOP\n"); 228 | return; 229 | case LD1: 230 | pretty(instr.LD1.buffer); 231 | printf(" <- LD1("); 232 | pretty(instr.LD1.addr); 233 | printf(")\n"); 234 | return; 235 | case LD2: 236 | printf("LD2\n"); 237 | return; 238 | case LD3: 239 | printf("LD3("); 240 | pretty(instr.LD3.buffer); 241 | printf(")\n"); 242 | return; 243 | case LD4: 244 | pretty(instr.LD4.dest); 245 | printf(" <- LD4\n"); 246 | return; 247 | case ST1: 248 | printf("ST1("); 249 | pretty(instr.ST1.buffer); 250 | printf(") <- "); 251 | pretty(instr.ST1.data); 252 | printf("\n"); 253 | return; 254 | case ST2: 255 | printf("ST2("); 256 | pretty(instr.ST2.buffer); 257 | printf(", "); 258 | pretty(instr.ST2.addr); 259 | printf(")\n"); 260 | return; 261 | case ST3: 262 | printf("ST3\n"); 263 | return; 264 | case PRS: 265 | printf("PRS(\"%s\")", instr.PRS); 266 | return; 267 | case PRI: 268 | printf("PRI("); 269 | pretty(instr.PRI); 270 | printf(")\n"); 271 | return; 272 | case PRF: 273 | printf("PRF("); 274 | pretty(instr.PRF); 275 | printf(")\n"); 276 | return; 277 | case RECV: 278 | printf("RECV("); 279 | pretty(instr.RECV.dest); 280 | printf(")\n"); 281 | return; 282 | case TMU0_TO_ACC4: 283 | printf("TMU0_TO_ACC4\n"); 284 | return; 285 | case SINC: 286 | printf("SINC %i\n", instr.semaId); 287 | return; 288 | case SDEC: 289 | printf("SDEC %i\n", instr.semaId); 290 | return; 291 | case IRQ: 292 | printf("IRQ\n"); 293 | return; 294 | } 295 | } 296 | -------------------------------------------------------------------------------- /Lib/Target/Pretty.h: -------------------------------------------------------------------------------- 1 | #ifndef _TARGET_PRETTY_H_ 2 | #define _TARGET_PRETTY_H_ 3 | 4 | #include "Target/Syntax.h" 5 | 6 | // Pretty printer for the QPULib target language 7 | void pretty(Instr instr); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /Lib/Target/ReachingDefs.cpp: -------------------------------------------------------------------------------- 1 | // Reaching definitions analysis 2 | 3 | #include "Source/Syntax.h" 4 | #include "Target/ReachingDefs.h" 5 | #include "Target/Liveness.h" 6 | 7 | // ============================================================================ 8 | // Compute 'defsOf' mapping 9 | // ============================================================================ 10 | 11 | // Compute a mapping from each register id to a set of instruction ids 12 | // that assign to that register. 13 | 14 | typedef Seq> DefsOf; 15 | 16 | void computeDefsOf(Seq* instrs, DefsOf* defsOf) 17 | { 18 | int numVars = getFreshVarCount(); 19 | defsOf->setCapacity(numVars); 20 | defsOf->numElems = numVars; 21 | 22 | for (int i = 0; i < instrs->numElems; i++) { 23 | UseDef set; 24 | useDef(instrs->elems[i], &set); 25 | for (int j = 0; j < set.def.numElems; j++) { 26 | RegId r = set.def.elems[j]; 27 | defsOf->elems[r].insert(i); 28 | } 29 | } 30 | } 31 | 32 | // ============================================================================ 33 | // Compute 'usesOf' mapping 34 | // ============================================================================ 35 | 36 | // Compute a mapping from each register id to a set of instruction ids 37 | // that use that register. 38 | 39 | typedef Seq> UsesOf; 40 | 41 | void computeUsesOf(Seq* instrs, UsesOf* usesOf) 42 | { 43 | int numVars = getFreshVarCount(); 44 | usesOf->setCapacity(numVars); 45 | usesOf->numElems = numVars; 46 | 47 | for (int i = 0; i < instrs->numElems; i++) { 48 | UseDef set; 49 | useDef(instrs->elems[i], &set); 50 | for (int j = 0; j < set.use.numElems; j++) { 51 | RegId r = set.use.elems[j]; 52 | usesOf->elems[r].insert(i); 53 | } 54 | } 55 | } 56 | 57 | // ============================================================================ 58 | // Compute 'gen' and 'kill' sets 59 | // ============================================================================ 60 | 61 | // 'gen' set: an instruction labelled x 'generates' x if it 62 | // modifies any variable. 63 | // 'kill' set: an instruction labelled x that modifies a register y 64 | // kills all instructions that modify y, except x. 65 | 66 | struct GenKill { 67 | SmallSeq gen; 68 | SmallSeq kill; 69 | }; 70 | 71 | // Compute 'gen' and 'kill' sets for a given instruction 72 | 73 | void computeGenKill(InstrId id, Instr instr, DefsOf* defsOf, GenKill* genKill) 74 | { 75 | // Make the 'gen' and 'kill' sets empty 76 | genKill->gen.clear(); 77 | genKill->kill.clear(); 78 | 79 | // Does instruction modify a reg? 80 | bool isDef = false; 81 | 82 | // If so, which reg? 83 | RegId defReg; 84 | 85 | switch (instr.tag) { 86 | // Load immediate 87 | case LI: 88 | // Add destination reg to 'def' set 89 | if (instr.LI.dest.tag == REG_A) { 90 | isDef = true; 91 | defReg = instr.LI.dest.regId; 92 | } 93 | break; 94 | 95 | // ALU operation 96 | case ALU: 97 | // Add destination reg to 'def' set 98 | if (instr.ALU.dest.tag == REG_A) { 99 | isDef = true; 100 | defReg = instr.ALU.dest.regId; 101 | } 102 | break; 103 | 104 | // LD4 instruction 105 | case LD4: 106 | // Add dest reg to 'def' set 107 | if (instr.LD4.dest.tag == REG_A) { 108 | isDef = true; 109 | defReg = instr.LD4.dest.regId; 110 | } 111 | break; 112 | } 113 | 114 | if (isDef) { 115 | genKill->gen.insert(id); 116 | SmallSeq* defs = &defsOf->elems[defReg]; 117 | for (int i = 0; i < defs->numElems; i++) 118 | if (defs->elems[i] != id) 119 | genKill->kill.insert(defs->elems[i]); 120 | } 121 | } 122 | 123 | // ============================================================================ 124 | // Compute live reaching definitions for each instruction 125 | // ============================================================================ 126 | 127 | // Helper function: given the reaching-out definitions, compute the 128 | // reaching-in set for a given instruction. For efficiency reasons, 129 | // we only return live definitions that reach-in, but on the down-side 130 | // this means we have to perform liveness analysis first. 131 | 132 | void computeReachIn(Seq* instrs, CFG* preds, Liveness* live, 133 | ReachingDefs* defs, InstrId i, ReachSet* reachIn) 134 | { 135 | LiveSet* liveIn = &live->elems[i]; 136 | reachIn->clear(); 137 | Succs* p = &preds->elems[i]; 138 | for (int j = 0; j < p->numElems; j++) { 139 | ReachSet* set = &defs->elems[p->elems[j]]; 140 | for (int k = 0; k < set->numElems; k++) { 141 | InstrId d = set->elems[k]; 142 | // Compute vars defined by instruction 143 | UseDef useDefSet; 144 | useDef(instrs->elems[d], &useDefSet); 145 | // Only add live definitions to the set 146 | for (int n = 0; n < useDefSet.def.numElems; n++) { 147 | if (liveIn->member(useDefSet.def.elems[n])) { 148 | reachIn->insert(d); 149 | break; 150 | } 151 | } 152 | } 153 | } 154 | } 155 | 156 | void reachingOutDefs(Seq* instrs, Liveness* live, 157 | CFG* preds, ReachingDefs* defs) 158 | { 159 | // Make sure defs is large enough 160 | defs->setCapacity(instrs->numElems); 161 | defs->numElems = instrs->numElems; 162 | 163 | // Find all definitions of each register 164 | DefsOf defsOf; 165 | computeDefsOf(instrs, &defsOf); 166 | 167 | // For storing the 'gen' and 'kill' sets of each instruction 168 | GenKill genKillSets; 169 | 170 | // For temporarily storing reaching-in and reaching-out definitions 171 | ReachSet reachIn; 172 | ReachSet reachOut; 173 | 174 | // Has a change been made to the reaching-definitions mapping? 175 | bool changed = true; 176 | 177 | // Iterate until no change, i.e. fixed point 178 | while (changed) { 179 | changed = false; 180 | 181 | // Propagate reaching definitions forward 182 | for (int i = 0; i < instrs->numElems; i++) { 183 | // Compute 'gen' and 'kill' sets 184 | Instr instr = instrs->elems[i]; 185 | computeGenKill(i, instr, &defsOf, &genKillSets); 186 | 187 | // Compute reaching-in definitions 188 | computeReachIn(instrs, preds, live, defs, i, &reachIn); 189 | 190 | // Remove the 'kill' set from the reach-in set to give reach-out set 191 | reachOut.clear(); 192 | for (int j = 0; j < reachIn.numElems; j++) { 193 | if (! genKillSets.kill.member(reachIn.elems[j])) 194 | reachOut.insert(reachIn.elems[j]); 195 | } 196 | 197 | // Add the 'gen' set to the reach-out set 198 | for (int j = 0; j < genKillSets.gen.numElems; j++) 199 | reachOut.insert(genKillSets.gen.elems[j]); 200 | 201 | // Insert the reach-out variables into the map 202 | for (int j = 0; j < reachOut.numElems; j++) { 203 | bool inserted = defs->elems[i].insert(reachOut.elems[j]); 204 | changed = changed || inserted; 205 | } 206 | } 207 | } 208 | } 209 | 210 | void reachingDefs(Seq* instrs, CFG* cfg, ReachingDefs* defs) 211 | { 212 | // For efficiency, perform liveness analysis first 213 | Liveness live; 214 | liveness(instrs, cfg, &live); 215 | 216 | // Reverse the arrows in the CFG 217 | CFG preds; 218 | reverseCFG(cfg, &preds); 219 | 220 | // Make sure defs is large enough 221 | defs->setCapacity(instrs->numElems); 222 | defs->numElems = instrs->numElems; 223 | 224 | // Compute defs reaching-out of each instruction 225 | ReachingDefs out; 226 | reachingOutDefs(instrs, &live, &preds, &out); 227 | 228 | // Compute defs reaching-in to each instruction 229 | for (int i = 0; i < defs->numElems; i++) 230 | computeReachIn(instrs, &preds, &live, &out, i, &defs->elems[i]); 231 | } 232 | 233 | // ============================================================================ 234 | // Compute instructions reached-by each definition 235 | // ============================================================================ 236 | 237 | void computeReachedBy(Seq* instrs, CFG* cfg, ReachingDefs* reachedBy) 238 | { 239 | // Make sure reachedBy is large enough 240 | reachedBy->setCapacity(instrs->numElems); 241 | reachedBy->numElems = instrs->numElems; 242 | 243 | // Find all uses of each register 244 | UsesOf usesOf; 245 | computeUsesOf(instrs, &usesOf); 246 | 247 | // Compute definitions reaching each instruction 248 | ReachingDefs defs; 249 | reachingDefs(instrs, cfg, &defs); 250 | 251 | for (int i = 0; i < instrs->numElems; i++) { 252 | // Compute def set 253 | UseDef useDefSet; 254 | useDef(instrs->elems[i], &useDefSet); 255 | 256 | for (int j = 0; j < useDefSet.def.numElems; j++) { 257 | RegId r = useDefSet.def.elems[j]; 258 | SmallSeq* uses = &usesOf.elems[r]; 259 | for (int k = 0; k < uses->numElems; k++) { 260 | InstrId u = uses->elems[k]; 261 | if (defs.elems[u].member(i)) 262 | reachedBy->elems[i].insert(u); 263 | } 264 | } 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /Lib/Target/ReachingDefs.h: -------------------------------------------------------------------------------- 1 | // Reaching definitions analysis 2 | 3 | #ifndef _REACHINGDEFS_H_ 4 | #define _REACHINGDEFS_H_ 5 | 6 | #include "Common/Seq.h" 7 | #include "Target/Syntax.h" 8 | #include "Target/CFG.h" 9 | 10 | // A reach set containts the instruction ids 11 | // that reach an instruction. 12 | 13 | typedef SmallSeq ReachSet; 14 | 15 | // The result of the analysis is a set of 16 | // instruction ids that reach each instruction. 17 | 18 | typedef Seq ReachingDefs; 19 | 20 | // Determine the live definitions reaching each instruction. 21 | 22 | void reachingDefs(Seq* instrs, CFG* cfg, ReachingDefs* defs); 23 | 24 | // Determine the instructions reached-by each definition. 25 | 26 | void computeReachedBy(Seq* instrs, CFG* cfg, ReachingDefs* reachedBy); 27 | 28 | // Compute a mapping from each register id to a set of instruction ids 29 | // that assign to that register. 30 | typedef Seq DefsOf; 31 | void computeDefsOf(Seq* instrs, DefsOf* defsOf); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /Lib/Target/RegAlloc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Source/Syntax.h" 3 | #include "Target/Syntax.h" 4 | #include "Target/RegAlloc.h" 5 | #include "Target/Subst.h" 6 | #include "Target/Liveness.h" 7 | 8 | // ============================================================================ 9 | // Accumulator allocation 10 | // ============================================================================ 11 | 12 | // This is a simple peephole optimisation, captured by the following 13 | // rewrite rule: 14 | // 15 | // i: x <- f(...) 16 | // j: g(..., x, ...) 17 | // 18 | // ===> if x not live-out of j 19 | // 20 | // i: acc <- f(...) 21 | // j: g(..., acc, ...) 22 | 23 | void introduceAccum(CFG* cfg, Liveness* live, Seq* instrs) 24 | { 25 | UseDef useDefPrev, useDefCurrent; 26 | LiveSet liveOut; 27 | 28 | Reg acc; 29 | acc.tag = ACC; 30 | acc.regId = 1; 31 | 32 | for (int i = 1; i < instrs->numElems; i++) { 33 | Instr prev = instrs->elems[i-1]; 34 | Instr instr = instrs->elems[i]; 35 | 36 | // Compute vars defined by prev 37 | useDef(prev, &useDefPrev); 38 | 39 | if (useDefPrev.def.numElems > 0) { 40 | RegId def = useDefPrev.def.elems[0]; 41 | 42 | // Compute vars used by instr 43 | useDef(instr, &useDefCurrent); 44 | 45 | // Compute vars live-out of instr 46 | computeLiveOut(cfg, live, i, &liveOut); 47 | 48 | // Check that write is non-conditional 49 | bool always = (prev.tag == LI && prev.LI.cond.tag == ALWAYS) 50 | || (prev.tag == ALU && prev.ALU.cond.tag == ALWAYS); 51 | 52 | if (always && 53 | useDefCurrent.use.member(def) && 54 | !liveOut.member(def)) { 55 | renameDest(&prev, REG_A, def, ACC, 1); 56 | renameUses(&instr, REG_A, def, ACC, 1); 57 | instrs->elems[i-1] = prev; 58 | instrs->elems[i] = instr; 59 | } 60 | } 61 | } 62 | } 63 | 64 | // ============================================================================ 65 | // Register allocation 66 | // ============================================================================ 67 | 68 | void regAlloc(CFG* cfg, Seq* instrs) 69 | { 70 | // Step 0 71 | // Perform liveness analysis 72 | Liveness live; 73 | liveness(instrs, cfg, &live); 74 | 75 | // Optimisation pass that introduces accumulators 76 | introduceAccum(cfg, &live, instrs); 77 | 78 | // Step 1 79 | // For each variable, determine a preference for register file A or B. 80 | int n = getFreshVarCount(); 81 | int* prefA = new int [n]; 82 | int* prefB = new int [n]; 83 | UseDef useDefSet; 84 | for (int i = 0; i < n; i++) prefA[i] = prefB[i] = 0; 85 | 86 | for (int i = 0; i < instrs->numElems; i++) { 87 | Instr instr = instrs->elems[i]; 88 | Reg ra, rb; 89 | if (getTwoUses(instr, &ra, &rb) && ra.tag == REG_A && rb.tag == REG_A) { 90 | RegId x = ra.regId; 91 | RegId y = rb.regId; 92 | if (prefA[x] > prefA[y] || prefB[y] > prefB[x]) 93 | { prefA[x]++; prefB[y]++; } 94 | else 95 | { prefA[y]++; prefB[x]++; } 96 | } 97 | else if (instr.tag == ALU && 98 | instr.ALU.srcA.tag == REG && 99 | instr.ALU.srcA.reg.tag == REG_A && 100 | instr.ALU.srcB.tag == IMM) { 101 | prefA[instr.ALU.srcA.reg.regId]++; 102 | } 103 | else if (instr.tag == ALU && 104 | instr.ALU.srcB.tag == REG && 105 | instr.ALU.srcB.reg.tag == REG_A && 106 | instr.ALU.srcA.tag == IMM) { 107 | prefA[instr.ALU.srcB.reg.regId]++; 108 | } 109 | } 110 | 111 | // Step 2 112 | // For each variable, determine all variables ever live at same time 113 | LiveSet* liveWith = new LiveSet [n]; 114 | LiveSet liveOut; 115 | for (int i = 0; i < instrs->numElems; i++) { 116 | computeLiveOut(cfg, &live, i, &liveOut); 117 | useDef(instrs->elems[i], &useDefSet); 118 | for (int j = 0; j < liveOut.numElems; j++) { 119 | RegId rx = liveOut.elems[j]; 120 | for (int k = 0; k < liveOut.numElems; k++) { 121 | RegId ry = liveOut.elems[k]; 122 | if (rx != ry) liveWith[rx].insert(ry); 123 | } 124 | for (int k = 0; k < useDefSet.def.numElems; k++) { 125 | RegId rd = useDefSet.def.elems[k]; 126 | if (rd != rx) { 127 | liveWith[rx].insert(rd); 128 | liveWith[rd].insert(rx); 129 | } 130 | } 131 | } 132 | } 133 | 134 | // Step 3 135 | // Allocate a register to each variable 136 | RegTag prevChosenRegFile = REG_B; 137 | Reg* alloc = new Reg [n]; 138 | for (int i = 0; i < n; i++) alloc[i].tag = NONE; 139 | 140 | const int NUM_REGS = 32; 141 | bool possibleA[NUM_REGS]; 142 | bool possibleB[NUM_REGS]; 143 | 144 | for (int i = 0; i < n; i++) { 145 | for (int j = 0; j < NUM_REGS; j++) 146 | possibleA[j] = possibleB[j] = true; 147 | 148 | // Eliminate impossible choices of register for this variable 149 | LiveSet* set = &liveWith[i]; 150 | for (int j = 0; j < set->numElems; j++) { 151 | Reg neighbour = alloc[set->elems[j]]; 152 | if (neighbour.tag == REG_A) possibleA[neighbour.regId] = false; 153 | if (neighbour.tag == REG_B) possibleB[neighbour.regId] = false; 154 | } 155 | 156 | // Find possible register in each register file 157 | RegId chosenA = -1; 158 | RegId chosenB = -1; 159 | for (int j = 0; j < NUM_REGS; j++) 160 | if (possibleA[j]) { chosenA = j; break; } 161 | for (int j = 0; j < NUM_REGS; j++) 162 | if (possibleB[j]) { chosenB = j; break; } 163 | 164 | // Choose a register file 165 | RegTag chosenRegFile; 166 | if (chosenA < 0 && chosenB < 0) { 167 | printf("QPULib: register allocation failed, insufficient capacity\n"); 168 | exit(EXIT_FAILURE); 169 | } 170 | else if (chosenA < 0) chosenRegFile = REG_B; 171 | else if (chosenB < 0) chosenRegFile = REG_A; 172 | else { 173 | if (prefA[i] > prefB[i]) chosenRegFile = REG_A; 174 | else if (prefA[i] < prefB[i]) chosenRegFile = REG_B; 175 | else chosenRegFile = prevChosenRegFile == REG_A ? REG_B : REG_A; 176 | } 177 | prevChosenRegFile = chosenRegFile; 178 | 179 | // Finally, allocate a register to the variable 180 | alloc[i].tag = chosenRegFile; 181 | alloc[i].regId = chosenRegFile == REG_A ? chosenA : chosenB; 182 | } 183 | 184 | // Step 4 185 | // Apply the allocation to the code 186 | for (int i = 0; i < instrs->numElems; i++) { 187 | useDef(instrs->elems[i], &useDefSet); 188 | Instr* instr = &instrs->elems[i]; 189 | for (int j = 0; j < useDefSet.def.numElems; j++) { 190 | RegId r = useDefSet.def.elems[j]; 191 | RegTag tmp = alloc[r].tag == REG_A ? TMP_A : TMP_B; 192 | renameDest(instr, REG_A, r, tmp, alloc[r].regId); 193 | } 194 | for (int j = 0; j < useDefSet.use.numElems; j++) { 195 | RegId r = useDefSet.use.elems[j]; 196 | RegTag tmp = alloc[r].tag == REG_A ? TMP_A : TMP_B; 197 | renameUses(instr, REG_A, r, tmp, alloc[r].regId); 198 | } 199 | substRegTag(instr, TMP_A, REG_A); 200 | substRegTag(instr, TMP_B, REG_B); 201 | } 202 | 203 | // Free memory 204 | delete [] prefA; 205 | delete [] prefB; 206 | delete [] liveWith; 207 | } 208 | 209 | 210 | 211 | -------------------------------------------------------------------------------- /Lib/Target/RegAlloc.h: -------------------------------------------------------------------------------- 1 | #ifndef _REGALLOC_H_ 2 | #define _REGALLOC_H_ 3 | 4 | #include "Target/CFG.h" 5 | #include "Target/Liveness.h" 6 | #include "Target/Syntax.h" 7 | #include "Common/Seq.h" 8 | 9 | void regAlloc(CFG* cfg, Seq* instrs); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /Lib/Target/RemoveLabels.cpp: -------------------------------------------------------------------------------- 1 | #include "Target/RemoveLabels.h" 2 | 3 | // ============================================================================ 4 | // Remove labels 5 | // ============================================================================ 6 | 7 | // Remove all labels, replacing absolute branch-label instructions 8 | // with relative branch-target instructions. 9 | 10 | void removeLabels(Seq* instrs) 11 | { 12 | Seq newInstrs; 13 | 14 | // The number of labels in the instruction sequence 15 | int numLabels = getFreshLabelCount(); 16 | 17 | // A mapping from labels to instruction ids 18 | InstrId* labels = new InstrId [numLabels]; 19 | 20 | // Initialise label mapping 21 | for (int i = 0; i < numLabels; i++) 22 | labels[i] = -1; 23 | 24 | // First, remove labels, remembering the index of the instruction 25 | // pointed to by each label. 26 | for (int i = 0, j = 0; i < instrs->numElems; i++) { 27 | Instr instr = instrs->elems[i]; 28 | if (instr.tag == LAB) { 29 | labels[instr.label] = j; 30 | } 31 | else { 32 | newInstrs.append(instr); 33 | j++; 34 | } 35 | } 36 | 37 | // Second, remove branch-label instructions. 38 | instrs->numElems = newInstrs.numElems; 39 | for (int i = 0; i < newInstrs.numElems; i++) { 40 | Instr instr = newInstrs.elems[i]; 41 | if (instr.tag == BRL) { 42 | assert(instr.BRL.label >= 0 && instr.BRL.label < numLabels); 43 | int dest = labels[instr.BRL.label]; 44 | assert (dest >= 0); 45 | BranchTarget t; 46 | t.relative = true; 47 | t.useRegOffset = false; 48 | t.immOffset = dest - 4 - i; 49 | instr.tag = BR; 50 | instr.BR.target = t; 51 | instrs->elems[i] = instr; 52 | } 53 | else { 54 | instrs->elems[i] = instr; 55 | } 56 | } 57 | 58 | delete [] labels; 59 | } 60 | -------------------------------------------------------------------------------- /Lib/Target/RemoveLabels.h: -------------------------------------------------------------------------------- 1 | #ifndef _REMOVELABELS_H_ 2 | #define _REMOVELABELS_H_ 3 | 4 | #include "Target/Syntax.h" 5 | #include "Target/CFG.h" 6 | #include "Target/Liveness.h" 7 | #include "Common/Seq.h" 8 | 9 | // Remove all labels, replacing absolute branch-label instructions 10 | // with relative branch-target instructions. 11 | void removeLabels(Seq* instrs); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /Lib/Target/Satisfy.cpp: -------------------------------------------------------------------------------- 1 | #include "Target/Satisfy.h" 2 | #include "Target/Liveness.h" 3 | #include "Target/RegAlloc.h" 4 | #include 5 | #include 6 | 7 | // ============================= 8 | // Remap register to accumulator 9 | // ============================= 10 | 11 | // Return an instruction to move the contents of a register to an 12 | // accumulator, and change the use of that register in the given 13 | // instruction to the given accumulator. 14 | 15 | Instr remapAToAccum(Instr* instr, RegId acc) 16 | { 17 | assert(instr->ALU.srcA.tag == REG); 18 | 19 | AssignCond always; 20 | always.tag = ALWAYS; 21 | 22 | Instr move; 23 | move.tag = ALU; 24 | move.ALU.setFlags = false; 25 | move.ALU.cond = always; 26 | move.ALU.dest.tag = ACC; 27 | move.ALU.dest.regId = acc; 28 | move.ALU.srcA.tag = REG; 29 | move.ALU.srcA.reg = instr->ALU.srcA.reg; 30 | move.ALU.op = A_BOR; 31 | move.ALU.srcB.tag = REG; 32 | move.ALU.srcB.reg = instr->ALU.srcA.reg; 33 | 34 | instr->ALU.srcA.reg.tag = ACC; 35 | instr->ALU.srcA.reg.regId = acc; 36 | 37 | return move; 38 | } 39 | 40 | Instr remapBToAccum(Instr* instr, RegId acc) 41 | { 42 | assert(instr->ALU.srcB.tag == REG); 43 | 44 | AssignCond always; 45 | always.tag = ALWAYS; 46 | 47 | Instr move; 48 | move.tag = ALU; 49 | move.ALU.setFlags = false; 50 | move.ALU.cond = always; 51 | move.ALU.dest.tag = ACC; 52 | move.ALU.dest.regId = acc; 53 | move.ALU.srcA.tag = REG; 54 | move.ALU.srcA.reg = instr->ALU.srcB.reg; 55 | move.ALU.op = A_BOR; 56 | move.ALU.srcB.tag = REG; 57 | move.ALU.srcB.reg = instr->ALU.srcB.reg; 58 | 59 | instr->ALU.srcB.reg.tag = ACC; 60 | instr->ALU.srcB.reg.regId = acc; 61 | 62 | return move; 63 | } 64 | 65 | // ============================== 66 | // Resolve register file conflict 67 | // ============================== 68 | 69 | // Determine reg file of given register. 70 | 71 | RegTag regFileOf(Reg r) 72 | { 73 | if (r.tag == REG_A) return REG_A; 74 | if (r.tag == REG_B) return REG_B; 75 | if (r.tag == SPECIAL) { 76 | if (r.regId == SPECIAL_ELEM_NUM) return REG_A; 77 | if (r.regId == SPECIAL_QPU_NUM) return REG_B; 78 | if (r.regId == SPECIAL_DMA_LD_WAIT) return REG_A; 79 | if (r.regId == SPECIAL_DMA_ST_WAIT) return REG_B; 80 | } 81 | return NONE; 82 | } 83 | 84 | // When an instruction uses two (different) registers that are mapped 85 | // to the same register file, then remap one of them to an 86 | // accumulator. 87 | 88 | bool resolveRegFileConflict(Instr* instr, Instr* newInstr) 89 | { 90 | if (instr->tag == ALU && instr->ALU.srcA.tag == REG 91 | && instr->ALU.srcB.tag == REG) { 92 | int rfa = regFileOf(instr->ALU.srcA.reg); 93 | int rfb = regFileOf(instr->ALU.srcB.reg); 94 | if (rfa != NONE && rfb != NONE) { 95 | bool conflict = rfa == rfb && instr->ALU.srcA.reg.regId != 96 | instr->ALU.srcB.reg.regId; 97 | if (conflict) { 98 | *newInstr = remapAToAccum(instr, 0); 99 | return true; 100 | } 101 | } 102 | } 103 | return false; 104 | } 105 | 106 | // ============================= 107 | // Satisfy VideoCore constraints 108 | // ============================= 109 | 110 | // Transform an instruction sequence to satisfy various VideoCore 111 | // constraints, including: 112 | // 113 | // 1. fill branch delay slots with NOPs; 114 | // 115 | // 2. introduce accumulators for operands mapped to the same 116 | // register file; 117 | // 118 | // 3. introduce accumulators for horizontal rotation operands; 119 | // 120 | // 4. insert NOPs to account for data hazards: a destination 121 | // register (assuming it's not an accumulator) cannot be read by 122 | // the next instruction. 123 | 124 | // First pass: insert move-to-accumulator instructions. 125 | 126 | static void insertMoves(Seq* instrs, Seq* newInstrs) 127 | { 128 | for (int i = 0; i < instrs->numElems; i++) { 129 | Instr instr = instrs->elems[i]; 130 | RegId r; RegTag rt; 131 | 132 | if (instr.tag == ALU && instr.ALU.op == M_ROTATE) { 133 | // Insert moves for horizontal rotate operations 134 | newInstrs->append(remapAToAccum(&instr, 0)); 135 | if (instr.ALU.srcB.tag == REG) 136 | newInstrs->append(remapBToAccum(&instr, 5)); 137 | newInstrs->append(nop()); 138 | } 139 | else if (instr.tag == ALU && instr.ALU.srcA.tag == IMM && 140 | instr.ALU.srcB.tag == REG && 141 | regFileOf(instr.ALU.srcB.reg) == REG_B) { 142 | // Insert moves for an operation with a small immediate whose 143 | // register operand must reside in reg file B. 144 | newInstrs->append(remapBToAccum(&instr, 0)); 145 | } 146 | else if (instr.tag == ALU && instr.ALU.srcB.tag == IMM && 147 | instr.ALU.srcA.tag == REG && 148 | regFileOf(instr.ALU.srcA.reg) == REG_B) { 149 | // Insert moves for an operation with a small immediate whose 150 | // register operand must reside in reg file B. 151 | newInstrs->append(remapAToAccum(&instr, 0)); 152 | } 153 | else { 154 | // Insert moves for operands that are mapped to the same reg file 155 | Instr move; 156 | if (resolveRegFileConflict(&instr, &move)) 157 | newInstrs->append(move); 158 | } 159 | 160 | // Put current instruction into the new sequence 161 | newInstrs->append(instr); 162 | } 163 | } 164 | 165 | // Second pass: insert NOPs 166 | static void insertNops(Seq* instrs, Seq* newInstrs) 167 | { 168 | // Use/def sets 169 | UseDefReg mySet, prevSet; 170 | 171 | // Previous instruction 172 | Instr prev = nop(); 173 | 174 | for (int i = 0; i < instrs->numElems; i++) { 175 | Instr instr = instrs->elems[i]; 176 | RegId r; RegTag rt; 177 | 178 | // Insert NOPs to avoid data hazards 179 | useDefReg(prev, &prevSet); 180 | useDefReg(instr, &mySet); 181 | for (int j = 0; j < prevSet.def.numElems; j++) { 182 | Reg defReg = prevSet.def.elems[j]; 183 | bool needNop = defReg.tag == REG_A || defReg.tag == REG_B; 184 | if (needNop && mySet.use.member(defReg)) { 185 | newInstrs->append(nop()); 186 | break; 187 | } 188 | } 189 | 190 | // Put current instruction into the new sequence 191 | newInstrs->append(instr); 192 | 193 | // Insert NOPs in branch delay slots 194 | if (instr.tag == BRL || instr.tag == END) { 195 | for (int j = 0; j < 3; j++) 196 | newInstrs->append(nop()); 197 | prev = nop(); 198 | } 199 | 200 | // Update previous instruction 201 | if (instr.tag != LAB) prev = instr; 202 | } 203 | 204 | } 205 | 206 | // Combine passes 207 | 208 | void satisfy(Seq* instrs) 209 | { 210 | // New instruction sequence 211 | Seq newInstrs(instrs->numElems * 2); 212 | 213 | // Apply passes 214 | insertMoves(instrs, &newInstrs); 215 | instrs->clear(); 216 | insertNops(&newInstrs, instrs); 217 | } 218 | -------------------------------------------------------------------------------- /Lib/Target/Satisfy.h: -------------------------------------------------------------------------------- 1 | #ifndef _SATISFY_H_ 2 | #define _SATISFY_H_ 3 | 4 | #include "Target/Syntax.h" 5 | #include "Target/CFG.h" 6 | 7 | RegTag regFileOf(Reg r); 8 | void satisfy(Seq* instrs); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /Lib/Target/SmallLiteral.cpp: -------------------------------------------------------------------------------- 1 | #include "Target/SmallLiteral.h" 2 | #include 3 | 4 | // Small literals are literals that fit in the small immediate field 5 | // of the VideoCore-IV instruction set. 6 | 7 | const int NUM_SMALL_FLOATS = 16; 8 | const float smallFloats[NUM_SMALL_FLOATS] = { 9 | 1.0 10 | , 2.0 11 | , 4.0 12 | , 8.0 13 | , 16.0 14 | , 32.0 15 | , 64.0 16 | , 128.0 17 | , 0.00390625 18 | , 0.0078125 19 | , 0.015625 20 | , 0.03125 21 | , 0.0625 22 | , 0.125 23 | , 0.25 24 | , 0.5 25 | }; 26 | 27 | // Encode a small literal according to Table 5 of the VideoCore-IV 28 | // manual. Returns -1 if expression cannot be encoded as a small 29 | // literal. 30 | 31 | int encodeSmallLit(Expr* e) 32 | { 33 | if (e->tag == INT_LIT) { 34 | if (e->intLit >= 0 && e->intLit <= 15) 35 | return e->intLit; 36 | else if (e->intLit >= -16 && e->intLit <= -1) 37 | return 32 + e->intLit; 38 | } 39 | else if (e->tag == FLOAT_LIT) { 40 | if (e->floatLit == 0.0) 41 | return 0; 42 | else { 43 | int index = -1; 44 | for (int i = 0; i < NUM_SMALL_FLOATS; i++) 45 | if (smallFloats[i] == e->floatLit) { 46 | index = i; 47 | break; 48 | } 49 | if (index != -1) 50 | return 32 + index; 51 | } 52 | } 53 | return -1; 54 | } 55 | 56 | // Determine if a given expression (source language) can be stored in 57 | // a small immediate. 58 | 59 | bool isSmallLit(Expr* e) 60 | { 61 | return encodeSmallLit(e) >= 0; 62 | } 63 | 64 | // Decode a small literal. 65 | 66 | Word decodeSmallLit(int x) 67 | { 68 | Word w; 69 | if (x >= 32) { 70 | w.floatVal = smallFloats[x-32]; 71 | return w; 72 | } 73 | else if (x >= 16) { 74 | w.intVal = x-32; 75 | return w; 76 | } 77 | else if (x >= 0) { 78 | w.intVal = x; 79 | return w; 80 | } 81 | 82 | // Unreachable 83 | assert(false); 84 | } 85 | 86 | // Display a small literal. 87 | 88 | void printSmallLit(int x) 89 | { 90 | if (x >= 32) 91 | printf("%f", smallFloats[x-32]); 92 | else if (x >= 16) 93 | printf("%i", x-32); 94 | else if (x >= 0) 95 | printf("%i", x); 96 | } 97 | -------------------------------------------------------------------------------- /Lib/Target/SmallLiteral.h: -------------------------------------------------------------------------------- 1 | #ifndef _SMALL_LITERAL_H_ 2 | #define _SMALL_LITERAL_H_ 3 | 4 | #include "Source/Syntax.h" 5 | #include "Target/Emulator.h" 6 | 7 | int encodeSmallLit(Expr* e); 8 | bool isSmallLit(Expr* e); 9 | void printSmallLit(int x); 10 | Word decodeSmallLit(int x); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /Lib/Target/Subst.cpp: -------------------------------------------------------------------------------- 1 | #include "Target/Subst.h" 2 | 3 | // Rename a destination register in an instruction 4 | void renameDest(Instr* instr, RegTag vt, RegId v, 5 | RegTag wt, RegId w) 6 | { 7 | switch (instr->tag) { 8 | // Load immediate 9 | case LI: 10 | if (instr->LI.dest.tag == vt && instr->LI.dest.regId == v) { 11 | instr->LI.dest.tag = wt; 12 | instr->LI.dest.regId = w; 13 | } 14 | return; 15 | 16 | // ALU operation 17 | case ALU: 18 | if (instr->ALU.dest.tag == vt && instr->ALU.dest.regId == v) { 19 | instr->ALU.dest.tag = wt; 20 | instr->ALU.dest.regId = w; 21 | } 22 | return; 23 | 24 | // LD4 instruction 25 | case LD4: 26 | if (instr->LD4.dest.tag == vt && instr->LD4.dest.regId == v) { 27 | instr->LD4.dest.tag = wt; 28 | instr->LD4.dest.regId = w; 29 | } 30 | return; 31 | 32 | // RECV instruction 33 | case RECV: 34 | if (instr->RECV.dest.tag == vt && instr->RECV.dest.regId == v) { 35 | instr->RECV.dest.tag = wt; 36 | instr->RECV.dest.regId = w; 37 | } 38 | return; 39 | } 40 | } 41 | 42 | // Renamed a used register in an instruction 43 | void renameUses(Instr* instr, RegTag vt, RegId v, 44 | RegTag wt, RegId w) 45 | { 46 | switch (instr->tag) { 47 | // ALU operation 48 | case ALU: 49 | if (instr->ALU.srcA.tag == REG && instr->ALU.srcA.reg.tag == vt && 50 | instr->ALU.srcA.reg.regId == v) { 51 | instr->ALU.srcA.reg.tag = wt; 52 | instr->ALU.srcA.reg.regId = w; 53 | } 54 | 55 | if (instr->ALU.srcB.tag == REG && instr->ALU.srcB.reg.tag == vt && 56 | instr->ALU.srcB.reg.regId == v) { 57 | instr->ALU.srcB.reg.tag = wt; 58 | instr->ALU.srcB.reg.regId = w; 59 | } 60 | return; 61 | 62 | // LD1 instruction 63 | case LD1: 64 | if (instr->LD1.addr.tag == vt && instr->LD1.addr.regId == v) { 65 | instr->LD1.addr.tag = wt; 66 | instr->LD1.addr.regId = w; 67 | } 68 | return; 69 | 70 | // ST1 instruction 71 | case ST1: 72 | if (instr->ST1.data.tag == vt && instr->ST1.data.regId == v) { 73 | instr->ST1.data.tag = wt; 74 | instr->ST1.data.regId = w; 75 | } 76 | return; 77 | 78 | // ST2 instruction 79 | case ST2: 80 | if (instr->ST2.addr.tag == vt && instr->ST2.addr.regId == v) { 81 | instr->ST2.addr.tag = wt; 82 | instr->ST2.addr.regId = w; 83 | } 84 | return; 85 | 86 | // Print integer instruction 87 | case PRI: 88 | if (instr->PRI.tag == vt && instr->PRI.regId == v) { 89 | instr->PRI.tag = wt; 90 | instr->PRI.regId = w; 91 | } 92 | return; 93 | 94 | // Print float instruction 95 | case PRF: 96 | if (instr->PRF.tag == vt && instr->PRF.regId == v) { 97 | instr->PRF.tag = wt; 98 | instr->PRF.regId = w; 99 | } 100 | return; 101 | } 102 | } 103 | 104 | // Globally change register tag vt to wt in given instruction 105 | void substRegTag(Instr* instr, RegTag vt, RegTag wt) 106 | { 107 | switch (instr->tag) { 108 | // Load immediate 109 | case LI: 110 | if (instr->LI.dest.tag == vt) 111 | instr->LI.dest.tag = wt; 112 | return; 113 | 114 | // ALU operation 115 | case ALU: 116 | if (instr->ALU.dest.tag == vt) 117 | instr->ALU.dest.tag = wt; 118 | if (instr->ALU.srcA.tag == REG && instr->ALU.srcA.reg.tag == vt) 119 | instr->ALU.srcA.reg.tag = wt; 120 | if (instr->ALU.srcB.tag == REG && instr->ALU.srcB.reg.tag == vt) 121 | instr->ALU.srcB.reg.tag = wt; 122 | return; 123 | 124 | // LD1 instruction 125 | case LD1: 126 | if (instr->LD1.addr.tag == vt) 127 | instr->LD1.addr.tag = wt; 128 | return; 129 | 130 | // LD4 instruction 131 | case LD4: 132 | if (instr->LD4.dest.tag == vt) 133 | instr->LD4.dest.tag = wt; 134 | return; 135 | 136 | // ST1 instruction 137 | case ST1: 138 | if (instr->ST1.data.tag == vt) 139 | instr->ST1.data.tag = wt; 140 | return; 141 | 142 | // ST2 instruction 143 | case ST2: 144 | if (instr->ST2.addr.tag == vt) 145 | instr->ST2.addr.tag = wt; 146 | return; 147 | 148 | // Print integer instruction 149 | case PRI: 150 | if (instr->PRI.tag == vt) 151 | instr->PRI.tag = wt; 152 | return; 153 | 154 | // Print float instruction 155 | case PRF: 156 | if (instr->PRF.tag == vt) 157 | instr->PRF.tag = wt; 158 | return; 159 | 160 | // RECV instruction 161 | case RECV: 162 | if (instr->RECV.dest.tag == vt) 163 | instr->RECV.dest.tag = wt; 164 | return; 165 | 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /Lib/Target/Subst.h: -------------------------------------------------------------------------------- 1 | #ifndef _SUBST_H_ 2 | #define _SUBST_H_ 3 | 4 | #include "Target/Syntax.h" 5 | 6 | // Rename a destination register in an instruction 7 | void renameDest(Instr* instr, RegTag vt, RegId v, 8 | RegTag wt, RegId w); 9 | 10 | // Renamed a used register in an instruction 11 | void renameUses(Instr* instr, RegTag vt, RegId v, 12 | RegTag wt, RegId w); 13 | 14 | // Globally change register tag vt to wt in given instruction 15 | void substRegTag(Instr* instr, RegTag vt, RegTag wt); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /Lib/Target/Syntax.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Source/Syntax.h" 3 | #include "Target/Syntax.h" 4 | 5 | // ======= 6 | // Globals 7 | // ======= 8 | 9 | // Used for fresh label generation 10 | static int globalLabelId = 0; 11 | 12 | // ====================== 13 | // Handy syntax functions 14 | // ====================== 15 | 16 | // Determine if instruction is a conditional assignment 17 | bool isCondAssign(Instr* instr) 18 | { 19 | if (instr->tag == LI && instr->LI.cond.tag != ALWAYS) 20 | return true; 21 | if (instr->tag == ALU && instr->ALU.cond.tag != ALWAYS) 22 | return true; 23 | return false; 24 | } 25 | 26 | // Generate load-immediate instruction. 27 | 28 | Instr genLI(Reg dst, int i) 29 | { 30 | AssignCond always; 31 | always.tag = ALWAYS; 32 | 33 | Instr instr; 34 | instr.tag = LI; 35 | instr.LI.setFlags = false; 36 | instr.LI.cond = always; 37 | instr.LI.dest = dst; 38 | instr.LI.imm.tag = IMM_INT32; 39 | instr.LI.imm.intVal = i; 40 | 41 | return instr; 42 | } 43 | 44 | // Generate move instruction. 45 | 46 | Instr genMove(Reg dst, Reg src) 47 | { 48 | AssignCond always; 49 | always.tag = ALWAYS; 50 | 51 | Instr instr; 52 | instr.tag = ALU; 53 | instr.ALU.setFlags = false; 54 | instr.ALU.cond = always; 55 | instr.ALU.dest = dst; 56 | instr.ALU.srcA.tag = REG; 57 | instr.ALU.srcA.reg = src; 58 | instr.ALU.op = A_BOR; 59 | instr.ALU.srcB.tag = REG; 60 | instr.ALU.srcB.reg = src; 61 | 62 | return instr; 63 | } 64 | 65 | // Generate bitwise-or instruction. 66 | 67 | Instr genOR(Reg dst, Reg srcA, Reg srcB) 68 | { 69 | AssignCond always; 70 | always.tag = ALWAYS; 71 | 72 | Instr instr; 73 | instr.tag = ALU; 74 | instr.ALU.setFlags = false; 75 | instr.ALU.cond = always; 76 | instr.ALU.dest = dst; 77 | instr.ALU.srcA.tag = REG; 78 | instr.ALU.srcA.reg = srcA; 79 | instr.ALU.op = A_BOR; 80 | instr.ALU.srcB.tag = REG; 81 | instr.ALU.srcB.reg = srcB; 82 | 83 | return instr; 84 | } 85 | 86 | // Generate left-shift instruction. 87 | 88 | Instr genLShift(Reg dst, Reg srcA, int n) 89 | { 90 | assert(n >= 0 && n <= 15); 91 | 92 | AssignCond always; 93 | always.tag = ALWAYS; 94 | 95 | Instr instr; 96 | instr.tag = ALU; 97 | instr.ALU.setFlags = false; 98 | instr.ALU.cond = always; 99 | instr.ALU.dest = dst; 100 | instr.ALU.srcA.tag = REG; 101 | instr.ALU.srcA.reg = srcA; 102 | instr.ALU.op = A_SHL; 103 | instr.ALU.srcB.tag = IMM; 104 | instr.ALU.srcB.smallImm.tag = SMALL_IMM; 105 | instr.ALU.srcB.smallImm.val = n; 106 | 107 | return instr; 108 | } 109 | 110 | // Generate increment instruction. 111 | 112 | Instr genIncr(Reg dst, Reg srcA, int n) 113 | { 114 | assert(n >= 0 && n <= 15); 115 | 116 | AssignCond always; 117 | always.tag = ALWAYS; 118 | 119 | Instr instr; 120 | instr.tag = ALU; 121 | instr.ALU.setFlags = false; 122 | instr.ALU.cond = always; 123 | instr.ALU.dest = dst; 124 | instr.ALU.srcA.tag = REG; 125 | instr.ALU.srcA.reg = srcA; 126 | instr.ALU.op = A_ADD; 127 | instr.ALU.srcB.tag = IMM; 128 | instr.ALU.srcB.smallImm.tag = SMALL_IMM; 129 | instr.ALU.srcB.smallImm.val = n; 130 | 131 | return instr; 132 | } 133 | 134 | // Is last instruction in a basic block? 135 | bool isLast(Instr instr) 136 | { 137 | return instr.tag == BRL || instr.tag == BR || instr.tag == END; 138 | } 139 | 140 | // ========================= 141 | // Fresh variable generation 142 | // ========================= 143 | 144 | // Obtain a fresh variable 145 | Reg freshReg() 146 | { 147 | Var v = freshVar(); 148 | Reg r; 149 | r.tag = REG_A; 150 | r.regId = v.id; 151 | return r; 152 | } 153 | 154 | // Obtain a fresh label 155 | Label freshLabel() 156 | { 157 | return globalLabelId++; 158 | } 159 | 160 | // Number of fresh labels 161 | int getFreshLabelCount() 162 | { 163 | return globalLabelId; 164 | } 165 | 166 | // Reset fresh label generator 167 | void resetFreshLabelGen() 168 | { 169 | globalLabelId = 0; 170 | } 171 | 172 | // Reset fresh label generator to specified value 173 | void resetFreshLabelGen(int val) 174 | { 175 | globalLabelId = val; 176 | } 177 | -------------------------------------------------------------------------------- /Lib/Target/Syntax.h: -------------------------------------------------------------------------------- 1 | #ifndef _TARGET_SYNTAX_H_ 2 | #define _TARGET_SYNTAX_H_ 3 | 4 | #include 5 | 6 | // Syntax of the QPU target language. 7 | 8 | // This abstract syntax is a balance between a strict and relaxed 9 | // definition of the target language: 10 | // 11 | // a "strict" definition would allow only instructions that can run on 12 | // the target machine to be expressed, whereas a "relaxed" one allows 13 | // instructions that have no direct mapping to machine instructions. 14 | // 15 | // A relaxed definition allows the compilation process to be incremental: 16 | // after each pass, the target code gets closer to being executable, by 17 | // transforming away constructs that do not have a direct mapping to 18 | // hardware. However, we do not want to be too relaxed, otherwise we 19 | // loose scope for the type checker to help us. 20 | // 21 | // For example, the definition below allows an instruction to read two 22 | // operands from the *same* register file. In fact, two operands must be 23 | // taken from different register files in the target language. It is the 24 | // job of a compiler pass to enforce such a constraint. 25 | 26 | // ============================================================================ 27 | // Sub-word selectors 28 | // ============================================================================ 29 | 30 | // A sub-word selector allows a 32, 16, or 8-bit portion of each vector 31 | // word to be selected. 32 | 33 | enum SubWord { 34 | A8 // Bits 7..0 35 | , B8 // Bits 15..8 36 | , C8 // Bits 23..16 37 | , D8 // Bits 31..24 38 | , A16 // Bits 15..0 39 | , B16 // Bits 31..16 40 | , A32 // Bits 31..0 41 | }; 42 | 43 | // ============================================================================ 44 | // Registers 45 | // ============================================================================ 46 | 47 | typedef int RegId; 48 | 49 | // Different kinds of registers 50 | enum RegTag { 51 | REG_A // In register file A (0..31) 52 | , REG_B // In register file B (0..31) 53 | , ACC // Accumulator register 54 | , SPECIAL // Special register 55 | , NONE // No read/write 56 | , TMP_A // Used in intermediate code 57 | , TMP_B // Used in intermediate code 58 | }; 59 | 60 | inline bool isRegAorB(RegTag rt) 61 | { return rt == REG_A || rt == REG_B; } 62 | 63 | // Special registers 64 | enum Special { 65 | // Read-only 66 | SPECIAL_UNIFORM 67 | , SPECIAL_ELEM_NUM 68 | , SPECIAL_QPU_NUM 69 | , SPECIAL_VPM_READ 70 | 71 | // Write-only 72 | , SPECIAL_RD_SETUP 73 | , SPECIAL_WR_SETUP 74 | , SPECIAL_DMA_ST_ADDR 75 | , SPECIAL_DMA_ST_WAIT 76 | , SPECIAL_DMA_LD_ADDR 77 | , SPECIAL_DMA_LD_WAIT 78 | , SPECIAL_VPM_WRITE 79 | , SPECIAL_HOST_INT 80 | , SPECIAL_TMU0_S 81 | }; 82 | 83 | struct Reg { 84 | // What kind of register is it? 85 | RegTag tag; 86 | 87 | // Register identifier 88 | RegId regId; 89 | }; 90 | 91 | inline bool operator==(Reg ra, Reg rb) 92 | { return ra.tag == rb.tag && ra.regId == rb.regId; } 93 | 94 | // ============================================================================ 95 | // Conditions 96 | // ============================================================================ 97 | 98 | enum Flag { 99 | ZS // Zero set 100 | , ZC // Zero clear 101 | , NS // Negative set 102 | , NC // Negative clear 103 | }; 104 | 105 | // Branch conditions 106 | 107 | enum BranchCondTag { 108 | COND_ALL // Reduce vector of bits to a single 109 | , COND_ANY // bit using AND/OR reduction 110 | , COND_ALWAYS 111 | , COND_NEVER 112 | }; 113 | 114 | struct BranchCond { 115 | // ALL or ANY reduction? 116 | BranchCondTag tag; 117 | 118 | // Condition flag 119 | Flag flag; 120 | }; 121 | 122 | // Assignment conditions 123 | 124 | enum AssignCondTag { 125 | NEVER 126 | , ALWAYS 127 | , FLAG 128 | }; 129 | 130 | struct AssignCond { 131 | // Kind of assignment condition 132 | AssignCondTag tag; 133 | 134 | // Condition flag 135 | Flag flag; 136 | }; 137 | 138 | // ============================================================================ 139 | // Immediates 140 | // ============================================================================ 141 | 142 | // Different kinds of immediate 143 | enum ImmTag { 144 | IMM_INT32 // 32-bit word 145 | , IMM_FLOAT32 // 32-bit float 146 | , IMM_MASK // 1 bit per vector element (0 to 0xffff) 147 | }; 148 | 149 | struct Imm { 150 | ImmTag tag; 151 | 152 | union { 153 | int intVal; 154 | float floatVal; 155 | }; 156 | }; 157 | 158 | // Different kinds of small immediates 159 | enum SmallImmTag { 160 | SMALL_IMM // Small immediate 161 | , ROT_ACC // Rotation amount taken from accumulator 5 162 | , ROT_IMM // Rotation amount 1..15 163 | }; 164 | 165 | struct SmallImm { 166 | // What kind of small immediate is it? 167 | SmallImmTag tag; 168 | 169 | // Immediate value 170 | int val; 171 | }; 172 | 173 | // A register or a small immediate operand? 174 | enum RegOrImmTag { REG, IMM }; 175 | 176 | struct RegOrImm { 177 | // Register id or small immediate? 178 | RegOrImmTag tag; 179 | 180 | union { 181 | // A register 182 | Reg reg; 183 | 184 | // A small immediate 185 | SmallImm smallImm; 186 | }; 187 | }; 188 | 189 | // ============================================================================ 190 | // ALU operations 191 | // ============================================================================ 192 | 193 | // Add operators 194 | enum ALUOp { 195 | NOP // No op 196 | 197 | // Opcodes for the 'add' ALU 198 | , A_FADD // Floating-point add 199 | , A_FSUB // Floating-point subtract 200 | , A_FMIN // Floating-point min 201 | , A_FMAX // Floating-point max 202 | , A_FMINABS // Floating-point min of absolute values 203 | , A_FMAXABS // Floating-point max of absolute values 204 | , A_FtoI // Float to signed integer 205 | , A_ItoF // Signed integer to float 206 | , A_ADD // Integer add 207 | , A_SUB // Integer subtract 208 | , A_SHR // Integer shift right 209 | , A_ASR // Integer arithmetic shift right 210 | , A_ROR // Integer rotate right 211 | , A_SHL // Integer shift left 212 | , A_MIN // Integer min 213 | , A_MAX // Integer max 214 | , A_BAND // Bitwise and 215 | , A_BOR // Bitwise or 216 | , A_BXOR // Bitwise xor 217 | , A_BNOT // Bitwise not 218 | , A_CLZ // Count leading zeros 219 | , A_V8ADDS // Add with saturation per 8-bit element 220 | , A_V8SUBS // Subtract with saturation per 8-bit element 221 | 222 | // Opcodes for the 'mul' ALU 223 | , M_FMUL // Floating-point multiply 224 | , M_MUL24 // 24-bit integer multiply 225 | , M_V8MUL // Multiply per 8-bit element 226 | , M_V8MIN // Min per 8-bit element 227 | , M_V8MAX // Max per 8-bit element 228 | , M_V8ADDS // Add with saturation per 8-bit element 229 | , M_V8SUBS // Subtract with saturation per 8-bit element 230 | , M_ROTATE // Rotation (intermediate op-code) 231 | 232 | }; 233 | 234 | inline bool isMulOp(ALUOp op) 235 | { 236 | return op == M_FMUL || op == M_MUL24 || op == M_V8MUL || 237 | op == M_V8MIN || op == M_V8MAX || op == M_V8ADDS || 238 | op == M_V8SUBS || op == M_ROTATE; 239 | } 240 | 241 | // ============================================================================ 242 | // Branch targets 243 | // ============================================================================ 244 | 245 | struct BranchTarget { 246 | // Branch is absolute or relative to PC+4 247 | bool relative; 248 | 249 | // Plus value from register file A (optional) 250 | bool useRegOffset; 251 | RegId regOffset; 252 | 253 | // Plus 32-bit immediate value 254 | int immOffset; 255 | }; 256 | 257 | // We allow labels for branching, represented by integer identifiers. These 258 | // will be translated to actual branch targets in a linking phase. 259 | 260 | typedef int Label; 261 | 262 | // ============================================================================ 263 | // Loads/store buffering 264 | // ============================================================================ 265 | 266 | // We reserve two load buffers and two store buffers for each QPU in the VPM 267 | // (shared local) memory. The reason for two of each is to allow double 268 | // buffering. We refer to a double buffer as A and B buffers. 269 | 270 | enum BufferAorB { A, B }; 271 | 272 | // ============================================================================ 273 | // Instructions 274 | // ============================================================================ 275 | 276 | // QPU instruction tags 277 | enum InstrTag { 278 | LI // Load immediate 279 | , ALU // ALU operation 280 | , BR // Conditional branch to target 281 | , END // Program end (halt) 282 | 283 | // ================================================== 284 | // The remainder are intermediate-language constructs 285 | // ================================================== 286 | 287 | , BRL // Conditional branch to label 288 | , LAB // Label 289 | , NO_OP // No-op 290 | 291 | // Load instructions 292 | // ----------------- 293 | // 294 | // Four instructions are used to implement a memory load. 295 | 296 | , LD1 // First, DMA vector in DRAM into VPM (local) memory 297 | , LD2 // Second, wait for DMA completion 298 | , LD3 // Third, setup a read from VPM memory 299 | , LD4 // Fourth, transfer from VPM into given register 300 | 301 | // Rules for loads: 302 | // * An LD1 must be followed (eventually) by a corresponding LD2 303 | // * Ditto for LD3 and LD4 304 | // * There must be at least 3 instructions between an LD3 and an LD4 305 | // * An LD1/LD2 need not be followed by a corresponding LD3/LD4, 306 | // thus can be issued speculatively 307 | // * A new LD1 can be issued after an LD2, allowing double buffering 308 | 309 | // Store instructions 310 | // ------------------ 311 | // 312 | // Three instructions are required to perform a memory store. 313 | 314 | , ST1 // First, write the vector to VPM (local) memory. 315 | , ST2 // Second, DMA from the VPM out to DRAM. 316 | , ST3 // Third, wait for DMA to complete. 317 | 318 | // Semaphores 319 | // ---------- 320 | 321 | , SINC // Increment semaphore 322 | , SDEC // Decrement semaphore 323 | 324 | // Send IRQ to host 325 | // ---------------- 326 | 327 | , IRQ 328 | 329 | // Load receive via TMU 330 | // -------------------- 331 | 332 | , RECV 333 | , TMU0_TO_ACC4 334 | 335 | // Print instructions 336 | // ------------------ 337 | 338 | , PRS // Print string 339 | , PRI // Print integer 340 | , PRF // Print float 341 | }; 342 | 343 | // QPU instructions 344 | struct Instr { 345 | // What kind of instruction is it? 346 | InstrTag tag; 347 | 348 | union { 349 | // Load immediate 350 | struct { bool setFlags; AssignCond cond; Reg dest; Imm imm; } LI; 351 | 352 | // ALU operation 353 | struct { bool setFlags; AssignCond cond; Reg dest; 354 | RegOrImm srcA; ALUOp op; RegOrImm srcB; } ALU; 355 | 356 | // Conditional branch (to target) 357 | struct { BranchCond cond; BranchTarget target; } BR; 358 | 359 | // ================================================== 360 | // The remainder are intermediate-language constructs 361 | // ================================================== 362 | 363 | // Conditional branch (to label) 364 | struct { BranchCond cond; Label label; } BRL; 365 | 366 | // Labels, denoting branch targets 367 | Label label; 368 | 369 | // Load instructions 370 | // ----------------- 371 | 372 | // DMA vector at address specifed by register from DRAM into VPM 373 | // (local) memory. To allow double buffering, i.e. the VPM to be 374 | // filled by DMA while also being read by a QPU, a flag is used to 375 | // indicate which one of two buffers in the VPM to use for the load 376 | struct { Reg addr; BufferAorB buffer; } LD1; 377 | 378 | // LD2 (wait for DMA read completion) has no parameters 379 | 380 | // Setup a read from VPM memory. A flag indicates which one of 381 | // two buffers in the VPM is being used for the load 382 | struct { BufferAorB buffer; } LD3; 383 | 384 | // Transfer from VPM into given register 385 | struct { Reg dest; } LD4; 386 | 387 | // Store instructions 388 | // ------------------ 389 | 390 | // Write the vector to VPM (local) memory using specified buffer 391 | struct { Reg data; BufferAorB buffer; } ST1; 392 | 393 | // DMA from the VPM out to DRAM at the address in given register. 394 | struct { Reg addr; BufferAorB buffer; } ST2; 395 | 396 | // ST3 (wait for DMA write completion) has no parameters 397 | 398 | // Semaphores 399 | // ---------- 400 | 401 | // Semaphore id (range 0..15) 402 | int semaId; 403 | 404 | // Load receive via TMU 405 | // -------------------- 406 | 407 | // Destination register for load receive 408 | struct { Reg dest; } RECV; 409 | 410 | // Print instructions 411 | // ------------------ 412 | 413 | // Print string 414 | const char* PRS; 415 | 416 | // Print integer 417 | Reg PRI; 418 | 419 | // Print float 420 | Reg PRF; 421 | }; 422 | }; 423 | 424 | // Instruction id: also the index of an instruction 425 | // in the main instruction sequence 426 | typedef int InstrId; 427 | 428 | // ============================================================================ 429 | // Handy functions 430 | // ============================================================================ 431 | 432 | // Determine if instruction is a conditional assignment 433 | bool isCondAssign(Instr* instr); 434 | 435 | // Make a no-op 436 | inline Instr nop() 437 | { Instr instr; instr.tag = NO_OP; return instr; } 438 | 439 | // Instruction constructors 440 | Instr genLI(Reg dst, int i); 441 | Instr genMove(Reg dst, Reg src); 442 | Instr genOR(Reg dst, Reg srcA, Reg srcB); 443 | Instr genLShift(Reg dst, Reg srcA, int n); 444 | Instr genIncr(Reg dst, Reg srcA, int n); 445 | 446 | // Is last instruction in a basic block? 447 | bool isLast(Instr instr); 448 | 449 | // ========================= 450 | // Fresh variable generation 451 | // ========================= 452 | 453 | Reg freshReg(); 454 | 455 | // ====================== 456 | // Fresh label generation 457 | // ====================== 458 | 459 | // Obtain a fresh label 460 | Label freshLabel(); 461 | 462 | // Number of fresh labels used 463 | int getFreshLabelCount(); 464 | 465 | // Reset fresh label generator 466 | void resetFreshLabelGen(); 467 | void resetFreshLabelGen(int val); 468 | 469 | #endif 470 | -------------------------------------------------------------------------------- /Lib/VideoCore/Invoke.cpp: -------------------------------------------------------------------------------- 1 | #ifdef QPU_MODE 2 | 3 | #include "VideoCore/Invoke.h" 4 | #include "VideoCore/Mailbox.h" 5 | #include "VideoCore/VideoCore.h" 6 | 7 | #define QPU_TIMEOUT 10000 8 | 9 | void invoke( 10 | int numQPUs, 11 | SharedArray &codeMem, 12 | int qpuCodeMemOffset, 13 | Seq* params) 14 | { 15 | // Open mailbox for talking to VideoCore 16 | int mb = getMailbox(); 17 | 18 | // Number of 32-bit words needed for kernel code & parameters 19 | int numWords = qpuCodeMemOffset + (params->numElems+2)*numQPUs + 2*numQPUs; 20 | assert(numWords < codeMem.size); 21 | 22 | // Pointer to start of code 23 | uint32_t* qpuCodePtr = codeMem.getPointer(); 24 | 25 | // Copy parameters to instruction memory 26 | int offset = qpuCodeMemOffset; 27 | uint32_t** paramsPtr = new uint32_t* [numQPUs]; 28 | for (int i = 0; i < numQPUs; i++) { 29 | paramsPtr[i] = qpuCodePtr + offset; 30 | codeMem[offset++] = (uint32_t) i; // Unique QPU ID 31 | codeMem[offset++] = (uint32_t) numQPUs; // QPU count 32 | for (int j = 0; j < params->numElems; j++) 33 | codeMem[offset++] = params->elems[j]; 34 | } 35 | 36 | // Copy launch messages 37 | uint32_t* launchMsgsPtr = qpuCodePtr + offset; 38 | for (int i = 0; i < numQPUs; i++) { 39 | codeMem[offset++] = (uint32_t) paramsPtr[i]; 40 | codeMem[offset++] = (uint32_t) qpuCodePtr; 41 | } 42 | 43 | // Launch QPUs 44 | unsigned result = 45 | execute_qpu(mb, numQPUs, (uint32_t) launchMsgsPtr, 1, QPU_TIMEOUT); 46 | 47 | if (result != 0) { 48 | printf("Failed to invoke kernel on QPUs\n"); 49 | } 50 | } 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /Lib/VideoCore/Invoke.h: -------------------------------------------------------------------------------- 1 | #ifdef QPU_MODE 2 | 3 | #ifndef _INVOKE_H_ 4 | #define _INVOKE_H_ 5 | 6 | #include "Common/Seq.h" 7 | #include "VideoCore/SharedArray.h" 8 | #include 9 | 10 | void invoke( 11 | int numQPUs, 12 | SharedArray &codeMem, 13 | int qpuCodeMemOffset, 14 | Seq* params); 15 | 16 | #endif 17 | #endif 18 | -------------------------------------------------------------------------------- /Lib/VideoCore/Mailbox.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2012, Broadcom Europe Ltd. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | * Neither the name of the copyright holder nor the 13 | names of its contributors may be used to endorse or promote products 14 | derived from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY 20 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #include "Mailbox.h" 39 | 40 | #define PAGE_SIZE (4*1024) 41 | 42 | void *mapmem(unsigned base, unsigned size) 43 | { 44 | int mem_fd; 45 | unsigned offset = base % PAGE_SIZE; 46 | base = base - offset; 47 | /* open /dev/mem */ 48 | if ((mem_fd = open("/dev/mem", O_RDWR|O_SYNC) ) < 0) { 49 | printf("can't open /dev/mem\nThis program should be run as root. Try prefixing command with: sudo\n"); 50 | exit (-1); 51 | } 52 | void *mem = mmap( 53 | 0, 54 | size, 55 | PROT_READ|PROT_WRITE, 56 | MAP_SHARED/*|MAP_FIXED*/, 57 | mem_fd, 58 | base); 59 | #ifdef DEBUG 60 | printf("base=0x%x, mem=%p\n", base, mem); 61 | #endif 62 | if (mem == MAP_FAILED) { 63 | printf("mmap error %p\n", mem); 64 | exit (-1); 65 | } 66 | close(mem_fd); 67 | return (char *)mem + offset; 68 | } 69 | 70 | void unmapmem(void *addr, unsigned size) 71 | { 72 | int s = munmap(addr, size); 73 | if (s != 0) { 74 | printf("munmap error %d\n", s); 75 | exit (-1); 76 | } 77 | } 78 | 79 | /* 80 | * use ioctl to send mbox property message 81 | */ 82 | 83 | static int mbox_property(int file_desc, void *buf) 84 | { 85 | int ret_val = ioctl(file_desc, IOCTL_MBOX_PROPERTY, buf); 86 | 87 | if (ret_val < 0) { 88 | printf("ioctl_set_msg failed:%d\n", ret_val); 89 | } 90 | 91 | #ifdef DEBUG 92 | unsigned *p = (unsigned*) buf; int i; unsigned size = *(unsigned *)buf; 93 | for (i=0; i 33 | 34 | #define MAJOR_NUM 100 35 | #define IOCTL_MBOX_PROPERTY _IOWR(MAJOR_NUM, 0, char *) 36 | #define DEVICE_FILE_NAME "/dev/vcio" 37 | 38 | int mbox_open(); 39 | void mbox_close(int file_desc); 40 | 41 | unsigned get_version(int file_desc); 42 | unsigned mem_alloc(int file_desc, unsigned size, unsigned align, unsigned flags); 43 | unsigned mem_free(int file_desc, unsigned handle); 44 | unsigned mem_lock(int file_desc, unsigned handle); 45 | unsigned mem_unlock(int file_desc, unsigned handle); 46 | void *mapmem(unsigned base, unsigned size); 47 | void unmapmem(void *addr, unsigned size); 48 | 49 | unsigned execute_code(int file_desc, unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); 50 | unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout); 51 | unsigned qpu_enable(int file_desc, unsigned enable); 52 | 53 | #define BUS_TO_PHYS(addr) (((addr)) & ~0xC0000000) 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /Lib/VideoCore/SharedArray.h: -------------------------------------------------------------------------------- 1 | #ifndef _SHAREDARRAY_H_ 2 | #define _SHAREDARRAY_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "VideoCore/Mailbox.h" 8 | #include "VideoCore/VideoCore.h" 9 | 10 | #ifdef EMULATION_MODE 11 | 12 | // ============================================================================ 13 | // Emulation mode 14 | // ============================================================================ 15 | 16 | // When in EMULATION_MODE allocate memory from a pre-allocated pool. 17 | 18 | #include "Target/Emulator.h" 19 | 20 | // Implementation 21 | template class SharedArray { 22 | private: 23 | // Disallow assignment 24 | void operator=(SharedArray a); 25 | void operator=(SharedArray& a); 26 | 27 | public: 28 | 29 | uint32_t address; 30 | uint32_t size; 31 | 32 | // Allocation 33 | void alloc(uint32_t n) { 34 | if (emuHeap == NULL) { 35 | emuHeapEnd = 0; 36 | emuHeap = new int32_t [EMULATOR_HEAP_SIZE]; 37 | } 38 | if (emuHeapEnd+n >= EMULATOR_HEAP_SIZE) { 39 | printf("QPULib: heap overflow (increase EMULATOR_HEAP_SIZE)\n"); 40 | abort(); 41 | } 42 | else { 43 | address = emuHeapEnd; 44 | emuHeapEnd += n; 45 | size = n; 46 | } 47 | } 48 | 49 | // Constructor 50 | SharedArray(uint32_t n) { 51 | alloc(n); 52 | } 53 | 54 | uint32_t getAddress() { 55 | return address*4; 56 | } 57 | 58 | T* getPointer() { 59 | return (T*) &emuHeap[address]; 60 | } 61 | 62 | // Deallocation (does nothing in emulation mode) 63 | void dealloc() {} 64 | 65 | // Subscript 66 | T& operator[] (int i) { 67 | if (address+i >= EMULATOR_HEAP_SIZE) { 68 | printf("QPULib: accessing off end of heap\n"); 69 | exit(EXIT_FAILURE); 70 | } 71 | else 72 | return (T&) emuHeap[address+i]; 73 | } 74 | }; 75 | 76 | #else 77 | 78 | // ============================================================================ 79 | // Not emulation mode 80 | // ============================================================================ 81 | 82 | #define GPU_MEM_FLG 0xC // cached=0xC; direct=0x4 83 | #define GPU_MEM_MAP 0x0 // cached=0x0; direct=0x20000000 84 | 85 | template class SharedArray { 86 | private: 87 | // Disallow assignment & copying 88 | void operator=(SharedArray a); 89 | void operator=(SharedArray& a); 90 | SharedArray(const SharedArray& a); 91 | 92 | uint32_t handle; 93 | void* arm_base; 94 | void* gpu_base; 95 | 96 | public: 97 | uint32_t size; 98 | 99 | /* Allocate GPU memory and map it into ARM address space */ 100 | void alloc(uint32_t n) { 101 | // Mailbox, for talking to VideoCore 102 | int mb = getMailbox(); 103 | 104 | // Allocate memory 105 | handle = mem_alloc(mb, n*4, 4096, GPU_MEM_FLG); 106 | if (!handle) { 107 | fprintf(stderr, "Failed to allocate GPU memory."); 108 | exit(EXIT_FAILURE); 109 | } 110 | size = n; 111 | gpu_base = (void*) mem_lock(mb, handle); 112 | arm_base = mapmem(BUS_TO_PHYS((uint32_t) gpu_base+GPU_MEM_MAP), n*4); 113 | } 114 | 115 | // Constructor 116 | SharedArray() { 117 | size = handle = 0; 118 | arm_base = gpu_base = NULL; 119 | } 120 | 121 | // Constructor 122 | SharedArray(uint32_t n) { 123 | size = handle = 0; 124 | alloc(n); 125 | } 126 | 127 | uint32_t getAddress() { 128 | return (uint32_t) gpu_base; 129 | } 130 | 131 | T* getPointer() { 132 | return (T*) gpu_base; 133 | } 134 | 135 | // Deallocation 136 | void dealloc() { 137 | // Mailbox, for talking to VideoCore 138 | int mb = getMailbox(); 139 | 140 | // Free memory 141 | if (arm_base) unmapmem(arm_base, size); 142 | if (handle) { 143 | mem_unlock(mb, handle); 144 | mem_free(mb, handle); 145 | } 146 | size = handle = 0; 147 | gpu_base = NULL; 148 | arm_base = NULL; 149 | } 150 | 151 | // Subscript 152 | inline T& operator[] (int i) { 153 | uint32_t* base = (uint32_t*) arm_base; 154 | return (T&) base[i]; 155 | } 156 | 157 | // Destructor 158 | ~SharedArray() { 159 | if (arm_base != NULL) dealloc(); 160 | } 161 | }; 162 | 163 | #endif 164 | 165 | #endif 166 | -------------------------------------------------------------------------------- /Lib/VideoCore/VideoCore.cpp: -------------------------------------------------------------------------------- 1 | #ifdef QPU_MODE 2 | 3 | #include 4 | #include 5 | #include 6 | #include "VideoCore/VideoCore.h" 7 | #include "VideoCore/Mailbox.h" 8 | 9 | // Globals 10 | int mailbox = -1; 11 | int numQPUUsers = 0; 12 | 13 | // Get mailbox (open if not already opened) 14 | int getMailbox() 15 | { 16 | if (mailbox < 0) mailbox = mbox_open(); 17 | return mailbox; 18 | } 19 | 20 | // Enable QPUs (if not already enabled) 21 | void enableQPUs() 22 | { 23 | int mb = getMailbox(); 24 | if (numQPUUsers == 0) { 25 | int qpu_enabled = !qpu_enable(mb, 1); 26 | if (!qpu_enabled) { 27 | printf("Unable to enable QPUs. Check your firmware is latest."); 28 | exit(EXIT_FAILURE); 29 | } 30 | } 31 | numQPUUsers++; 32 | } 33 | 34 | // Disable QPUs 35 | void disableQPUs() 36 | { 37 | assert(numQPUUsers > 0); 38 | int mb = getMailbox(); 39 | numQPUUsers--; 40 | if (numQPUUsers == 0) { 41 | qpu_enable(mb, 0); 42 | } 43 | } 44 | 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /Lib/VideoCore/VideoCore.h: -------------------------------------------------------------------------------- 1 | #ifdef QPU_MODE 2 | 3 | #ifndef _VIDEOCORE_H_ 4 | #define _VIDEOCORE_H_ 5 | 6 | // Globals 7 | extern int mailbox; 8 | extern int numQPUUsers; 9 | 10 | // Operations 11 | int getMailbox(); 12 | void enableQPUs(); 13 | void disableQPUs(); 14 | 15 | #endif 16 | #endif 17 | -------------------------------------------------------------------------------- /Tests/AutoTest.cpp: -------------------------------------------------------------------------------- 1 | #include "QPULib.h" 2 | #include "Common/Seq.h" 3 | #include "Source/Gen.h" 4 | #include "Source/Pretty.h" 5 | 6 | // ============================================================================ 7 | // Program-generator options 8 | // ============================================================================ 9 | 10 | GenOptions basicGenOpts() 11 | { 12 | GenOptions opts; 13 | opts.depth = 3; 14 | opts.length = 4; 15 | opts.numIntArgs = 4; 16 | opts.numFloatArgs = 0; 17 | opts.numPtrArgs = 0; 18 | opts.numPtr2Args = 0; 19 | opts.numIntVars = 4; 20 | opts.numFloatVars = 0; 21 | opts.loopBound = 5; 22 | opts.genFloat = false; 23 | opts.genRotate = false; 24 | opts.genDeref = false; 25 | opts.genDeref2 = false; 26 | opts.derefOffsetMask = 0; 27 | opts.genStrided = false; 28 | return opts; 29 | } 30 | 31 | // ============================================================================ 32 | // Helpers 33 | // ============================================================================ 34 | 35 | void printCharSeq(Seq* s) 36 | { 37 | for (int i = 0; i < s->numElems; i++) 38 | printf("%c", s->elems[i]); 39 | } 40 | 41 | // ============================================================================ 42 | // Main 43 | // ============================================================================ 44 | 45 | int main() 46 | { 47 | // Seed random generator 48 | srand(0); 49 | 50 | // Basic options 51 | GenOptions opts = basicGenOpts(); 52 | 53 | const int numTests = 10000; 54 | for (int test = 0; test < numTests; test++) { 55 | astHeap.clear(); 56 | resetFreshLabelGen(); 57 | 58 | int numVars, numEmuVars; 59 | Stmt* s = progGen(&opts, &numVars); 60 | //pretty(s); 61 | 62 | Seq targetCode; 63 | resetFreshVarGen(numVars); 64 | compileKernel(&targetCode, s); 65 | numEmuVars = getFreshVarCount(); 66 | Seq params; 67 | params.clear(); 68 | for (int i = 0; i < opts.numIntArgs; i++) { 69 | params.append(genIntLit()); 70 | } 71 | 72 | Seq interpOut, emuOut; 73 | interpreter(1, s, numVars, ¶ms, &interpOut); 74 | emulate(1, &targetCode, numEmuVars, ¶ms, &emuOut); 75 | 76 | bool differs = false; 77 | if (interpOut.numElems != emuOut.numElems) 78 | differs = true; 79 | else { 80 | for (int i = 0; i < interpOut.numElems; i++) 81 | if (interpOut.elems[i] != emuOut.elems[i]) { differs = true; break; } 82 | } 83 | 84 | if (differs) { 85 | printf("Failed test %i.\n", test); 86 | pretty(s); 87 | printf("Params: "); 88 | for (int i = 0; i < params.numElems; i++) { 89 | printf("%i ", params.elems[i]); 90 | } 91 | printf("\nTarget emulator says:\n"); 92 | printCharSeq(&emuOut); 93 | printf("\nSource interpreter says:\n"); 94 | printCharSeq(&interpOut); 95 | printf("\n"); 96 | return 0; 97 | } 98 | else 99 | printf("%i\r", test); 100 | } 101 | printf("OK, passed %i tests\n", numTests); 102 | 103 | return 0; 104 | } 105 | -------------------------------------------------------------------------------- /Tests/GCD.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "QPULib.h" 3 | 4 | void gcd(Ptr p, Ptr q, Ptr r) 5 | { 6 | Int a = *p; 7 | Int b = *q; 8 | While (any(a != b)) 9 | Where (a > b) 10 | a = a-b; 11 | End 12 | Where (a < b) 13 | b = b-a; 14 | End 15 | End 16 | *r = a; 17 | } 18 | 19 | int main() 20 | { 21 | // Construct kernel 22 | auto k = compile(gcd); 23 | 24 | // Allocate and initialise arrays shared between ARM and GPU 25 | SharedArray a(16), b(16), r(16); 26 | srand(0); 27 | for (int i = 0; i < 16; i++) { 28 | a[i] = 100 + (rand() % 100); 29 | b[i] = 100 + (rand() % 100); 30 | } 31 | 32 | // Invoke the kernel and display the result 33 | k(&a, &b, &r); 34 | for (int i = 0; i < 16; i++) 35 | printf("gcd(%i, %i) = %i\n", a[i], b[i], r[i]); 36 | 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /Tests/HeatMap.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | // Heat dissapation constant 7 | #define K 0.25 8 | 9 | // ============================================================================ 10 | // Vector version 11 | // ============================================================================ 12 | 13 | struct Cursor { 14 | Ptr addr; 15 | Float prev, current, next; 16 | 17 | void init(Ptr p) { 18 | gather(p); 19 | current = 0; 20 | addr = p+16; 21 | } 22 | 23 | void prime() { 24 | receive(next); 25 | gather(addr); 26 | } 27 | 28 | void advance() { 29 | addr = addr+16; 30 | prev = current; 31 | gather(addr); 32 | current = next; 33 | receive(next); 34 | } 35 | 36 | void finish() { 37 | receive(next); 38 | } 39 | 40 | void shiftLeft(Float& result) { 41 | result = rotate(current, 15); 42 | Float nextRot = rotate(next, 15); 43 | Where (index() == 15) 44 | result = nextRot; 45 | End 46 | } 47 | 48 | void shiftRight(Float& result) { 49 | result = rotate(current, 1); 50 | Float prevRot = rotate(prev, 1); 51 | Where (index() == 0) 52 | result = prevRot; 53 | End 54 | } 55 | }; 56 | 57 | void step(Ptr map, Ptr mapOut, Int pitch, Int width, Int height) 58 | { 59 | Cursor row[3]; 60 | map = map + pitch*me() + index(); 61 | 62 | // Skip first row of output map 63 | mapOut = mapOut + pitch; 64 | 65 | For (Int y = me(), y < height, y=y+numQPUs()) 66 | 67 | // Point p to the output row 68 | Ptr p = mapOut + y*pitch; 69 | 70 | // Initilaise three cursors for the three input rows 71 | for (int i = 0; i < 3; i++) row[i].init(map + i*pitch); 72 | for (int i = 0; i < 3; i++) row[i].prime(); 73 | 74 | // Compute one output row 75 | For (Int x = 0, x < width, x=x+16) 76 | 77 | for (int i = 0; i < 3; i++) row[i].advance(); 78 | 79 | Float left[3], right[3]; 80 | for (int i = 0; i < 3; i++) { 81 | row[i].shiftLeft(right[i]); 82 | row[i].shiftRight(left[i]); 83 | } 84 | 85 | Float sum = left[0] + row[0].current + right[0] + 86 | left[1] + right[1] + 87 | left[2] + row[2].current + right[2]; 88 | 89 | store(row[1].current - K * (row[1].current - sum * 0.125), p); 90 | p = p + 16; 91 | 92 | End 93 | 94 | // Cursors are finished for this row 95 | for (int i = 0; i < 3; i++) row[i].finish(); 96 | 97 | // Move to the next input rows 98 | map = map + pitch*numQPUs(); 99 | 100 | End 101 | } 102 | 103 | // ============================================================================ 104 | // Main 105 | // ============================================================================ 106 | 107 | int main() 108 | { 109 | // Size of 2D heat map is WIDTH*HEIGHT: 110 | // * with zero padding, it is NROWS*NCOLS 111 | // * i.e. there is constant cold at the edges 112 | // * NCOLs should be a multiple of 16 113 | // * HEIGHT should be a multiple of NQPUS 114 | const int NQPUS = 1; 115 | const int WIDTH = 512-16; 116 | const int NCOLS = WIDTH+16; 117 | const int HEIGHT = 504; 118 | const int NROWS = HEIGHT+2; 119 | const int NSPOTS = 10; 120 | const int NSTEPS = 1500; 121 | 122 | // Timestamps 123 | timeval tvStart, tvEnd, tvDiff; 124 | 125 | // Allocate and initialise input and output maps 126 | SharedArray mapA(NROWS*NCOLS), mapB(NROWS*NCOLS); 127 | for (int y = 0; y < NROWS; y++) 128 | for (int x = 0; x < NCOLS; x++) { 129 | mapA[y*NCOLS+x] = 0; 130 | mapB[y*NCOLS+x] = 0; 131 | } 132 | 133 | // Inject hot spots 134 | srand(0); 135 | for (int i = 0; i < NSPOTS; i++) { 136 | int t = rand() % 256; 137 | int x = rand() % WIDTH; 138 | int y = 1 + rand() % HEIGHT; 139 | mapA[y*NCOLS+x] = (float) (1000*t); 140 | } 141 | 142 | // Compile kernel 143 | auto k = compile(step); 144 | 145 | // Invoke kernel 146 | k.setNumQPUs(NQPUS); 147 | gettimeofday(&tvStart, NULL); 148 | for (int i = 0; i < NSTEPS; i++) { 149 | if (i & 1) 150 | k(&mapB, &mapA, NCOLS, WIDTH, HEIGHT); 151 | else 152 | k(&mapA, &mapB, NCOLS, WIDTH, HEIGHT); 153 | } 154 | gettimeofday(&tvEnd, NULL); 155 | timersub(&tvEnd, &tvStart, &tvDiff); 156 | 157 | // Display results 158 | printf("P2\n%i %i\n255\n", WIDTH, HEIGHT); 159 | for (int y = 0; y < HEIGHT; y++) 160 | for (int x = 0; x < WIDTH; x++) { 161 | int t = (int) mapB[(y+1)*NCOLS+x]; 162 | t = t < 0 ? 0 : t; 163 | t = t > 255 ? 255 : t; 164 | printf("%d\n", t); 165 | } 166 | 167 | // Run-time of simulation 168 | printf("# %ld.%06lds\n", tvDiff.tv_sec, tvDiff.tv_usec); 169 | 170 | return 0; 171 | } 172 | -------------------------------------------------------------------------------- /Tests/HeatMapScalar.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | // Heat dissapation constant 6 | #define K 0.25 7 | 8 | // ============================================================================ 9 | // Scalar version 10 | // ============================================================================ 11 | 12 | // One time step 13 | void step(float** map, float** mapOut, int width, int height) 14 | { 15 | for (int y = 1; y < height-1; y++) { 16 | for (int x = 1; x < width-1; x++) { 17 | float surroundings = 18 | map[y-1][x-1] + map[y-1][x] + map[y-1][x+1] + 19 | map[y][x-1] + map[y][x+1] + 20 | map[y+1][x-1] + map[y+1][x] + map[y+1][x+1]; 21 | surroundings *= 0.125; 22 | mapOut[y][x] = map[y][x] - (K * (map[y][x] - surroundings)); 23 | } 24 | } 25 | } 26 | 27 | // ============================================================================ 28 | // Main 29 | // ============================================================================ 30 | 31 | int main() 32 | { 33 | // Parameters 34 | const int WIDTH = 512; 35 | const int HEIGHT = 506; 36 | const int NSPOTS = 10; 37 | const int NSTEPS = 1500; 38 | 39 | // Timestamps 40 | timeval tvStart, tvEnd, tvDiff; 41 | 42 | // Allocate 43 | float* map = new float [WIDTH*HEIGHT]; 44 | float* mapOut = new float [WIDTH*HEIGHT]; 45 | float** map2D = new float* [HEIGHT]; 46 | float** mapOut2D = new float* [HEIGHT]; 47 | 48 | // Initialise 49 | for (int i = 0; i < WIDTH*HEIGHT; i++) map[i] = mapOut[i] = 0.0; 50 | for (int i = 0; i < HEIGHT; i++) { 51 | map2D[i] = &map[i*WIDTH]; 52 | mapOut2D[i] = &mapOut[i*WIDTH]; 53 | } 54 | 55 | // Inject hot spots 56 | srand(0); 57 | for (int i = 0; i < NSPOTS; i++) { 58 | int t = rand() % 256; 59 | int x = 1 + rand() % (WIDTH-2); 60 | int y = 1 + rand() % (HEIGHT-2); 61 | map2D[y][x] = (float) 1000*t; 62 | } 63 | 64 | // Simulate 65 | gettimeofday(&tvStart, NULL); 66 | for (int i = 0; i < NSTEPS; i++) { 67 | step(map2D, mapOut2D, WIDTH, HEIGHT); 68 | float** tmp = map2D; map2D = mapOut2D; mapOut2D = tmp; 69 | } 70 | gettimeofday(&tvEnd, NULL); 71 | timersub(&tvEnd, &tvStart, &tvDiff); 72 | 73 | // Display results 74 | printf("P2\n%i %i\n255\n", WIDTH, HEIGHT); 75 | for (int y = 0; y < HEIGHT; y++) 76 | for (int x = 0; x < WIDTH; x++) { 77 | int t = (int) map2D[y][x]; 78 | t = t < 0 ? 0 : t; 79 | t = t > 255 ? 255 : t; 80 | printf("%d\n", t); 81 | } 82 | 83 | // Run-time of simulation 84 | printf("# %ld.%06lds\n", tvDiff.tv_sec, tvDiff.tv_usec); 85 | 86 | return 0; 87 | } 88 | -------------------------------------------------------------------------------- /Tests/Hello.cpp: -------------------------------------------------------------------------------- 1 | #include "QPULib.h" 2 | 3 | // Define function that runs on the GPU. 4 | 5 | void hello(Ptr p) 6 | { 7 | *p = 1; 8 | } 9 | 10 | int main() 11 | { 12 | // Construct kernel 13 | auto k = compile(hello); 14 | 15 | // Allocate and initialise array shared between ARM and GPU 16 | SharedArray array(16); 17 | for (int i = 0; i < 16; i++) 18 | array[i] = 100; 19 | 20 | // Invoke the kernel and display the result 21 | k(&array); 22 | for (int i = 0; i < 16; i++) { 23 | printf("%i: %i\n", i, array[i]); 24 | } 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /Tests/ID.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // Define function that runs on the GPU. 4 | 5 | void hello(Ptr p) 6 | { 7 | p = p + (me() << 4); 8 | *p = me(); 9 | } 10 | 11 | int main() 12 | { 13 | // Construct kernel 14 | auto k = compile(hello); 15 | 16 | // Allocate and initialise array shared between ARM and GPU 17 | SharedArray array(192); 18 | for (int i = 0; i < 192; i++) 19 | array[i] = 0; 20 | 21 | // Invoke the kernel and display the result 22 | k.setNumQPUs(12); 23 | k(&array); 24 | for (int i = 0; i < 192; i++) { 25 | printf("%i: %i\n", i, array[i]); 26 | } 27 | 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /Tests/Makefile: -------------------------------------------------------------------------------- 1 | # Root directory of QPULib repository 2 | ROOT = ../Lib 3 | 4 | # Compiler and default flags 5 | CXX = g++ 6 | CXX_FLAGS = -fpermissive -Wconversion -std=c++0x -I $(ROOT) 7 | 8 | # Object directory 9 | OBJ_DIR = obj 10 | 11 | # Debug mode 12 | ifeq ($(DEBUG), 1) 13 | CXX_FLAGS += -DDEBUG 14 | OBJ_DIR := $(OBJ_DIR)-debug 15 | endif 16 | 17 | # QPU or emulation mode 18 | ifeq ($(QPU), 1) 19 | CXX_FLAGS += -DQPU_MODE 20 | OBJ_DIR := $(OBJ_DIR)-qpu 21 | else 22 | CXX_FLAGS += -DEMULATION_MODE 23 | endif 24 | 25 | # Object files 26 | OBJ = \ 27 | Kernel.o \ 28 | Source/Syntax.o \ 29 | Source/Int.o \ 30 | Source/Float.o \ 31 | Source/Stmt.o \ 32 | Source/Pretty.o \ 33 | Source/Translate.o \ 34 | Source/Interpreter.o \ 35 | Source/Gen.o \ 36 | Target/Syntax.o \ 37 | Target/SmallLiteral.o \ 38 | Target/Pretty.o \ 39 | Target/RemoveLabels.o \ 40 | Target/CFG.o \ 41 | Target/Liveness.o \ 42 | Target/RegAlloc.o \ 43 | Target/ReachingDefs.o \ 44 | Target/Subst.o \ 45 | Target/LiveRangeSplit.o \ 46 | Target/Satisfy.o \ 47 | Target/LoadStore.o \ 48 | Target/Emulator.o \ 49 | Target/Encode.o \ 50 | VideoCore/Mailbox.o \ 51 | VideoCore/Invoke.o \ 52 | VideoCore/VideoCore.o 53 | 54 | # Top-level targets 55 | 56 | .PHONY: top clean 57 | 58 | top: 59 | @echo Please supply a target to build, e.g. \'make GCD\' 60 | @echo 61 | 62 | clean: 63 | rm -rf obj obj-debug obj-qpu obj-debug-qpu 64 | rm -f Tri GCD Print MultiTri AutoTest OET Hello ReqRecv Rot3D ID *.o 65 | rm -f HeatMap 66 | 67 | LIB = $(patsubst %,$(OBJ_DIR)/%,$(OBJ)) 68 | 69 | Hello: Hello.o $(LIB) 70 | @echo Linking... 71 | @$(CXX) $^ -o $@ $(CXX_FLAGS) 72 | 73 | ID: ID.o $(LIB) 74 | @echo Linking... 75 | @$(CXX) $^ -o $@ $(CXX_FLAGS) 76 | 77 | Tri: Tri.o $(LIB) 78 | @echo Linking... 79 | @$(CXX) $^ -o $@ $(CXX_FLAGS) 80 | 81 | Print: Print.o $(LIB) 82 | @echo Linking... 83 | @$(CXX) $^ -o $@ $(CXX_FLAGS) 84 | 85 | GCD: GCD.o $(LIB) 86 | @echo Linking... 87 | @$(CXX) $^ -o $@ $(CXX_FLAGS) 88 | 89 | AutoTest: AutoTest.o $(LIB) 90 | @echo Linking... 91 | @$(CXX) $^ -o $@ $(CXX_FLAGS) 92 | 93 | MultiTri: MultiTri.o $(LIB) 94 | @echo Linking... 95 | @$(CXX) $^ -o $@ $(CXX_FLAGS) 96 | 97 | OET: OET.o $(LIB) 98 | @echo Linking... 99 | @$(CXX) $^ -o $@ $(CXX_FLAGS) 100 | 101 | ReqRecv: ReqRecv.o $(LIB) 102 | @echo Linking... 103 | @$(CXX) $^ -o $@ $(CXX_FLAGS) 104 | 105 | Rot3D: Rot3D.o $(LIB) 106 | @echo Linking... 107 | @$(CXX) $^ -o $@ $(CXX_FLAGS) 108 | 109 | HeatMap: HeatMap.o $(LIB) 110 | @echo Linking... 111 | @$(CXX) $^ -o $@ $(CXX_FLAGS) 112 | 113 | # Intermediate targets 114 | 115 | $(OBJ_DIR)/%.o: $(ROOT)/%.cpp $(OBJ_DIR) 116 | @echo Compiling $< 117 | @$(CXX) -c -o $@ $< $(CXX_FLAGS) 118 | 119 | %.o: %.cpp 120 | @echo Compiling $< 121 | @$(CXX) -c -o $@ $< $(CXX_FLAGS) 122 | 123 | $(OBJ_DIR): 124 | @mkdir -p $(OBJ_DIR) 125 | @mkdir -p $(OBJ_DIR)/Source 126 | @mkdir -p $(OBJ_DIR)/Target 127 | @mkdir -p $(OBJ_DIR)/VideoCore 128 | -------------------------------------------------------------------------------- /Tests/MultiTri.cpp: -------------------------------------------------------------------------------- 1 | #include "QPULib.h" 2 | 3 | void tri(Ptr p) 4 | { 5 | p = p + (me() << 4); 6 | Int n = *p; 7 | Int sum = 0; 8 | While (any(n > 0)) 9 | Where (n > 0) 10 | sum = sum+n; 11 | n = n-1; 12 | End 13 | End 14 | *p = sum; 15 | } 16 | 17 | int main() 18 | { 19 | // Construct kernel 20 | auto k = compile(tri); 21 | 22 | // Use 4 QPUs 23 | k.setNumQPUs(4); 24 | 25 | // Allocate and initialise array shared between ARM and GPU 26 | SharedArray array(64); 27 | for (int i = 0; i < 64; i++) 28 | array[i] = i; 29 | 30 | // Invoke the kernel and display the result 31 | k(&array); 32 | for (int i = 0; i < 64; i++) 33 | printf("%i: %i\n", i, array[i]); 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /Tests/OET.cpp: -------------------------------------------------------------------------------- 1 | #include "QPULib.h" 2 | 3 | // Odd/even transposition sorter for a 32-element array 4 | 5 | void oet(Ptr p) 6 | { 7 | setReadStride(1); 8 | setWriteStride(1); 9 | 10 | Int evens = *p; 11 | Int odds = *(p+1); 12 | 13 | For (Int count = 0, count < 16, count++) 14 | Int evens2 = min(evens, odds); 15 | Int odds2 = max(evens, odds); 16 | 17 | Int evens3 = rotate(evens2, 15); 18 | Int odds3 = odds2; 19 | 20 | Where (index() != 15) 21 | odds2 = min(evens3, odds3); 22 | End 23 | 24 | Where (index() != 0) 25 | evens2 = rotate(max(evens3, odds3), 1); 26 | End 27 | 28 | evens = evens2; 29 | odds = odds2; 30 | End 31 | 32 | *p = evens; 33 | *(p+1) = odds; 34 | } 35 | 36 | int main() 37 | { 38 | // Construct kernel 39 | auto k = compile(oet); 40 | 41 | // Allocate and initialise array shared between ARM and GPU 42 | SharedArray a(32); 43 | for (int i = 0; i < 32; i++) 44 | a[i] = 100-i; 45 | 46 | // Invoke the kernel and display the result 47 | k.call(&a); 48 | for (int i = 0; i < 32; i++) 49 | printf("%i: %i\n", i, a[i]); 50 | 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /Tests/Print.cpp: -------------------------------------------------------------------------------- 1 | #include "QPULib.h" 2 | 3 | void loop(Int n) 4 | { 5 | For (Int i = 0, i < n, i++) 6 | Print(i); 7 | Print("\n"); 8 | End 9 | } 10 | 11 | int main() 12 | { 13 | // Construct kernel 14 | auto k = compile(loop); 15 | 16 | // Invoke kernel with argument 20 17 | k(20); 18 | 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /Tests/ReqRecv.cpp: -------------------------------------------------------------------------------- 1 | #include "QPULib.h" 2 | 3 | // Define function that runs on the GPU. 4 | 5 | void test(Ptr p) 6 | { 7 | Int x, y; 8 | gather(p+index()); 9 | gather(p+16+index()); 10 | receive(x); 11 | receive(y); 12 | *p = x+y; 13 | } 14 | 15 | int main() 16 | { 17 | // Construct kernel 18 | auto k = compile(test); 19 | 20 | // Allocate and initialise array shared between ARM and GPU 21 | SharedArray array(32); 22 | for (int i = 0; i < 32; i++) 23 | array[i] = i; 24 | 25 | // Invoke the kernel and display the result 26 | k(&array); 27 | for (int i = 0; i < 16; i++) 28 | printf("%i: %i\n", i, array[i]); 29 | 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /Tests/Rot3D.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | // #define USE_SCALAR_VERSION 6 | 7 | // ============================================================================ 8 | // Scalar version 9 | // ============================================================================ 10 | 11 | void rot3D(int n, float cosTheta, float sinTheta, float* x, float* y) 12 | { 13 | for (int i = 0; i < n; i++) { 14 | float xOld = x[i]; 15 | float yOld = y[i]; 16 | x[i] = xOld * cosTheta - yOld * sinTheta; 17 | y[i] = yOld * cosTheta + xOld * sinTheta; 18 | } 19 | } 20 | 21 | // ============================================================================ 22 | // Vector version 1 23 | // ============================================================================ 24 | 25 | void rot3D_1(Int n, Float cosTheta, Float sinTheta, Ptr x, Ptr y) 26 | { 27 | For (Int i = 0, i < n, i = i+16) 28 | Float xOld = x[i]; 29 | Float yOld = y[i]; 30 | x[i] = xOld * cosTheta - yOld * sinTheta; 31 | y[i] = yOld * cosTheta + xOld * sinTheta; 32 | End 33 | } 34 | 35 | // ============================================================================ 36 | // Vector version 2 37 | // ============================================================================ 38 | 39 | void rot3D_2(Int n, Float cosTheta, Float sinTheta, Ptr x, Ptr y) 40 | { 41 | Int inc = 16; 42 | Ptr p = x + index(); 43 | Ptr q = y + index(); 44 | gather(p); gather(q); 45 | 46 | Float xOld, yOld; 47 | For (Int i = 0, i < n, i = i+inc) 48 | gather(p+inc); gather(q+inc); 49 | receive(xOld); receive(yOld); 50 | store(xOld * cosTheta - yOld * sinTheta, p); 51 | store(yOld * cosTheta + xOld * sinTheta, q); 52 | p = p+inc; q = q+inc; 53 | End 54 | 55 | receive(xOld); receive(yOld); 56 | } 57 | 58 | // ============================================================================ 59 | // Vector version 3 60 | // ============================================================================ 61 | 62 | void rot3D_3(Int n, Float cosTheta, Float sinTheta, Ptr x, Ptr y) 63 | { 64 | Int inc = numQPUs() << 4; 65 | Ptr p = x + index() + (me() << 4); 66 | Ptr q = y + index() + (me() << 4); 67 | gather(p); gather(q); 68 | 69 | Float xOld, yOld; 70 | For (Int i = 0, i < n, i = i+inc) 71 | gather(p+inc); gather(q+inc); 72 | receive(xOld); receive(yOld); 73 | store(xOld * cosTheta - yOld * sinTheta, p); 74 | store(yOld * cosTheta + xOld * sinTheta, q); 75 | p = p+inc; q = q+inc; 76 | End 77 | 78 | receive(xOld); receive(yOld); 79 | } 80 | 81 | // ============================================================================ 82 | // Main 83 | // ============================================================================ 84 | 85 | int main() 86 | { 87 | // Timestamps 88 | timeval tvStart, tvEnd, tvDiff; 89 | 90 | // Number of vertices and angle of rotation 91 | const int N = 19200; // 192000 92 | const float THETA = (float) 3.14159; 93 | 94 | #ifdef USE_SCALAR_VERSION 95 | // Allocate and initialise 96 | float* x = new float [N]; 97 | float* y = new float [N]; 98 | for (int i = 0; i < N; i++) { 99 | x[i] = (float) i; 100 | y[i] = (float) i; 101 | } 102 | #else 103 | // Construct kernel 104 | auto k = compile(rot3D_3); 105 | 106 | // Use 12 QPUs 107 | k.setNumQPUs(12); 108 | 109 | // Allocate and initialise arrays shared between ARM and GPU 110 | SharedArray x(N), y(N); 111 | for (int i = 0; i < N; i++) { 112 | x[i] = (float) i; 113 | y[i] = (float) i; 114 | } 115 | #endif 116 | 117 | gettimeofday(&tvStart, NULL); 118 | #ifdef USE_SCALAR_VERSION 119 | rot3D(N, cosf(THETA), sinf(THETA), x, y); 120 | #else 121 | k(N, cosf(THETA), sinf(THETA), &x, &y); 122 | #endif 123 | gettimeofday(&tvEnd, NULL); 124 | timersub(&tvEnd, &tvStart, &tvDiff); 125 | 126 | // Display results 127 | //for (int i = 0; i < N; i++) 128 | // printf("%f %f\n", x[i], y[i]); 129 | 130 | printf("%ld.%06lds\n", tvDiff.tv_sec, tvDiff.tv_usec); 131 | 132 | return 0; 133 | } 134 | -------------------------------------------------------------------------------- /Tests/Sort.cpp: -------------------------------------------------------------------------------- 1 | #include "QPULib.h" 2 | 3 | void oet(Ptr p) 4 | { 5 | setReadStride(1); 6 | setWriteStride(1); 7 | 8 | Int evens = *p; 9 | Int odds = *(p+1); 10 | 11 | For (Int count = 0, count < 16, count++) 12 | Int evens2 = min(evens, odds); 13 | Int odds2 = max(evens, odds); 14 | 15 | Int evens3 = rotate(evens2, 15); 16 | Int odds3 = odds2; 17 | 18 | Where (index() != 15) 19 | odds2 = min(evens3, odds3); 20 | End 21 | 22 | Where (index() != 0) 23 | evens2 = rotate(max(evens3, odds3), 1); 24 | End 25 | 26 | evens = evens2; 27 | odds = odds2; 28 | End 29 | 30 | *p = evens; 31 | *(p+1) = odds; 32 | } 33 | 34 | int main() 35 | { 36 | // Allocate and initialise array shared between ARM and GPU 37 | auto k = compile(oet); 38 | 39 | // Allocate some data for the kernel to process 40 | SharedArray a(32); 41 | for (int i = 0; i < 32; i++) 42 | a[i] = 100-i; 43 | 44 | // Invoke the kernel and display the result 45 | k(&a); 46 | for (int i = 0; i < 32; i++) 47 | printf("%i: %i\n", i, a[i]); 48 | 49 | return 0; 50 | } 51 | 52 | /* 53 | 54 | Periodic sorter 55 | =============== 56 | 57 | // Implementation of periodic sorter from: 58 | // 59 | // Design and verification of a sorter core 60 | // K. Claessen, M. Sheeran, S. Singh 61 | // 62 | // The 'qfly' network is easy to vectorise due to its simple structure, however 63 | // MergeSort.c should be quicker although slightly more complicated. 64 | 65 | #include 66 | #include 67 | #include 68 | 69 | // Sort 2^N numbers 70 | #define N 20 71 | 72 | // All compare-and-swaps in a pass can run in parallel 73 | void pass(int n, int* a, int s0, int s1, int stride) 74 | { 75 | int end = 1 << n; 76 | while (s1 < end) { 77 | if (a[s0] > a[s1]) { 78 | int tmp = a[s0]; 79 | a[s0] = a[s1]; 80 | a[s1] = tmp; 81 | } 82 | s0 += stride; 83 | s1 += stride; 84 | } 85 | } 86 | 87 | void sort(int n, int* a) 88 | { 89 | for (int i = 0; i < n; i++) { 90 | pass(n, a, 0, 1, 2); 91 | for (int j = 1 << (n-1); j > 1; j >>= 1) 92 | pass(n, a, 1, j, 2); 93 | } 94 | } 95 | 96 | int main() 97 | { 98 | // Create random array 99 | srand(time(NULL)); 100 | int* a = new int [1 << N]; 101 | for (int i = 0; i < 1 << N; i++) 102 | a[i] = rand() % 65536; 103 | 104 | // Sort it 105 | sort(N, a); 106 | 107 | // Check that it's sorted 108 | bool sorted = true; 109 | for (int i = 1; i < (1 << N); i++) 110 | sorted = sorted && (a[i-1] <= a[i]); 111 | printf("sorted = %s\n", sorted ? "true" : "false"); 112 | } 113 | 114 | Merge sorter 115 | ============ 116 | 117 | // An implementation of Batcher's odd/even merge sort. This should be 118 | // straightforward to vectorise, and it should allow a hybrid apporach 119 | // in which a odd/even transposition sort is used to sort small blocks 120 | // of elements quickly before merging the results. 121 | 122 | #include 123 | #include 124 | #include 125 | 126 | // Sort 2^N numbers 127 | #define N 20 128 | 129 | // All compare-and-swaps in a pass can run in parallel 130 | void pass(int n, int* a, int s0, int s1) 131 | { 132 | int count = 1; 133 | int g = s0; 134 | while (s1 < n) { 135 | if (a[s0] > a[s1]) { 136 | int tmp = a[s0]; 137 | a[s0] = a[s1]; 138 | a[s1] = tmp; 139 | } 140 | if (count == g) { 141 | count = 1; 142 | s0 += g+1; 143 | s1 += g+1; 144 | } 145 | else { 146 | s0++; 147 | s1++; 148 | count++; 149 | } 150 | } 151 | } 152 | 153 | void merge(int n, int* a) { 154 | pass(n, a, 0, n>>1); 155 | for (int i = n; i > 1; i = i >> 1) 156 | pass(n, a, i>>1, i); 157 | } 158 | 159 | void sort(int n, int* a) 160 | { 161 | for (int i = 2; i <= n; i = i << 1) 162 | // All merges in this loop can run in parallel 163 | for (int start = 0; start < n; start += i) 164 | merge(i, &a[start]); 165 | } 166 | 167 | int main() 168 | { 169 | // Create random array 170 | srand(time(NULL)); 171 | int* a = new int [1 << N]; 172 | for (int i = 0; i < 1 << N; i++) 173 | a[i] = rand() % 65536; 174 | 175 | // Sort it 176 | sort(1< p) 6 | { 7 | Int n = *p; 8 | Int sum = 0; 9 | While (any(n > 0)) 10 | Where (n > 0) 11 | sum = sum+n; 12 | n = n-1; 13 | End 14 | End 15 | *p = sum; 16 | } 17 | 18 | int main() 19 | { 20 | // Construct kernel 21 | auto k = compile(tri); 22 | 23 | // Allocate and initialise array shared between ARM and GPU 24 | SharedArray array(16); 25 | for (int i = 0; i < 16; i++) 26 | array[i] = i; 27 | 28 | // Invoke the kernel and display the result 29 | k(&array); 30 | for (int i = 0; i < 16; i++) 31 | printf("%i: %i\n", i, array[i]); 32 | 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /Tests/TriFloat.cpp: -------------------------------------------------------------------------------- 1 | #include "QPULib.h" 2 | 3 | // Define function that runs on the GPU. 4 | 5 | void tri(Ptr p) 6 | { 7 | Int n = toInt(*p); 8 | Int sum = 0; 9 | While (any(n > 0)) 10 | Where (n > 0) 11 | sum = sum+n; 12 | n = n-1; 13 | End 14 | End 15 | *p = toFloat(sum); 16 | } 17 | 18 | int main() 19 | { 20 | // Construct kernel 21 | auto k = compile(tri); 22 | 23 | // Allocate and initialise array shared between ARM and GPU 24 | SharedArray array(16); 25 | for (int i = 0; i < 16; i++) 26 | array[i] = (float) i; 27 | 28 | // Invoke the kernel and display the result 29 | k(&array); 30 | for (int i = 0; i < 16; i++) 31 | printf("%i: %f\n", i, array[i]); 32 | 33 | return 0; 34 | } 35 | --------------------------------------------------------------------------------