├── docs ├── pipeline.jpg ├── masterworker.png ├── decomposition.jpg ├── split_pipeline.jpg ├── parallel_pipeline.jpg ├── SciPy-20-landscape-v1d6.pdf ├── installupgrade.md ├── tutorial1.md ├── tutorial5.md ├── tutorial6.md ├── tutorial7.md ├── tutorial2.md ├── tutorial4.md └── tutorial3.md ├── modules ├── memory.py ├── random.py ├── concurrency.py ├── coprocessor.py ├── util.py ├── math.py ├── parallel.py ├── array.py └── taskfarm.py ├── examples ├── simpleplus.py ├── hello.py ├── p2pcomm.py ├── input.py ├── coreidentity.py ├── broadcast.py ├── reduction.py ├── loops.py ├── functions.py ├── arrays.py ├── synccores.py ├── pi_offload.py ├── controlflow.py ├── mandlebrot.py ├── odd-even-sort.py ├── task_farm_example.py ├── task_farm_pi.py ├── pi.py ├── haloswap.py ├── jacobi_offload.py ├── pipeline.py ├── parallel-odd-even-sort.py ├── split_pipeline.py ├── jacobi.py ├── gauss-seidel.py ├── mergesort.py └── parallel_pipeline.py ├── .gitignore ├── device ├── makefile ├── main.h └── main.c ├── epython.sh ├── host ├── host-functions.h ├── makefile ├── stack.h ├── python_interoperability.h ├── misc.h ├── configuration.h ├── device-support.h ├── memorymanager.h ├── stack.c ├── misc.c ├── parser.h ├── epython.l ├── byteassembler.h ├── epython.y ├── configuration.c └── memorymanager.c ├── LICENCE ├── makefile ├── shared.h ├── interpreter ├── functions.h ├── interpreter.h └── basictokens.h └── README.md /docs/pipeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesham/epython/HEAD/docs/pipeline.jpg -------------------------------------------------------------------------------- /docs/masterworker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesham/epython/HEAD/docs/masterworker.png -------------------------------------------------------------------------------- /docs/decomposition.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesham/epython/HEAD/docs/decomposition.jpg -------------------------------------------------------------------------------- /docs/split_pipeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesham/epython/HEAD/docs/split_pipeline.jpg -------------------------------------------------------------------------------- /docs/parallel_pipeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesham/epython/HEAD/docs/parallel_pipeline.jpg -------------------------------------------------------------------------------- /modules/memory.py: -------------------------------------------------------------------------------- 1 | def free(a): 2 | native rtl_free(a) 3 | 4 | def gc(): 5 | native rtl_gc() 6 | -------------------------------------------------------------------------------- /docs/SciPy-20-landscape-v1d6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mesham/epython/HEAD/docs/SciPy-20-landscape-v1d6.pdf -------------------------------------------------------------------------------- /examples/simpleplus.py: -------------------------------------------------------------------------------- 1 | /* 2 | Simple example of integer and real addition 3 | To run: epython simpleplus.py 4 | */ 5 | 6 | a=12 7 | b=2.3 8 | print a 9 | a=a+b 10 | print b 11 | -------------------------------------------------------------------------------- /examples/hello.py: -------------------------------------------------------------------------------- 1 | /* 2 | A simple hello world with string assignment and concatenation 3 | To run: epython hello.py 4 | */ 5 | 6 | a="hello" 7 | b="world" 8 | c=a+" "+b 9 | print c 10 | -------------------------------------------------------------------------------- /modules/random.py: -------------------------------------------------------------------------------- 1 | def randint(a, b): 2 | return (native rtl_math(14) % (b-a)) + a 3 | 4 | def randrange(a): 5 | return native rtl_math(14) % a 6 | 7 | def random(): 8 | return native rtl_math(14) % 20000 / 20001.0 9 | -------------------------------------------------------------------------------- /modules/concurrency.py: -------------------------------------------------------------------------------- 1 | def expose(data, pid): 2 | global_ref=native rtl_global_reference(id(data)) 3 | native rtl_send(global_ref,pid) 4 | 5 | def access(src): 6 | ref=native rtl_recv(src) 7 | return native rtl_dereference(ref) -------------------------------------------------------------------------------- /examples/p2pcomm.py: -------------------------------------------------------------------------------- 1 | /* 2 | Illustration of P2P blocking send and receives from for 0 to core 1. 3 | To run: epython p2pcomm.py 4 | */ 5 | 6 | from parallel import * 7 | 8 | if coreid()==0: 9 | send(20, 1) 10 | elif coreid()==1: 11 | print "Got value "+str(recv(0))+" from core 0" 12 | -------------------------------------------------------------------------------- /examples/input.py: -------------------------------------------------------------------------------- 1 | /* 2 | Illustration of input, note that there will be an input for each core (and the value input for that core reported) so you might want to run with only 1 or 2 cores 3 | To run: epython -c 0 input (to run on core 0 only) 4 | */ 5 | 6 | a=input("Enter your name: ") 7 | print "Hello "+a 8 | -------------------------------------------------------------------------------- /examples/coreidentity.py: -------------------------------------------------------------------------------- 1 | /* 2 | Illustration of printing, core id and conditional statements 3 | To run: epython coreidentity.py 4 | */ 5 | 6 | from parallel import * 7 | 8 | print "Hello world from core "+str(coreid()) 9 | 10 | if coreid()==5: 11 | print "Hello only from core "+str(coreid()) 12 | 13 | -------------------------------------------------------------------------------- /examples/broadcast.py: -------------------------------------------------------------------------------- 1 | /* 2 | Illustration of broadcasting a value from one core (0 here) to each other and displaying the result 3 | To run: epython broadcast.py 4 | */ 5 | 6 | from parallel import * 7 | from random import randrange 8 | 9 | a=bcast(randrange(100), 0) 10 | print "The random number from core 0 is "+str(a) 11 | -------------------------------------------------------------------------------- /examples/reduction.py: -------------------------------------------------------------------------------- 1 | /* 2 | Illustration of global reduction across all cores, here we find the maximum random number - can also do min, sum and prod 3 | To run: epython reduction.py 4 | */ 5 | 6 | from parallel import * 7 | from random import randrange 8 | 9 | a=reduce(randrange(100), "max") 10 | print "The highest random number is "+str(a) 11 | -------------------------------------------------------------------------------- /examples/loops.py: -------------------------------------------------------------------------------- 1 | /* 2 | Illustration of loops 3 | To run: epython loops.py 4 | */ 5 | 6 | from util import * 7 | 8 | for x in range(10): 9 | print "X="+str(x) 10 | 11 | for x in range(105,115): 12 | print "X="+str(x) 13 | 14 | list=[10,20,30,40,50,60] 15 | for x in list: 16 | print "List item="+str(x) 17 | 18 | i=10 19 | while i<=20: 20 | print "I="+str(i) 21 | i+=1 22 | -------------------------------------------------------------------------------- /examples/functions.py: -------------------------------------------------------------------------------- 1 | /* 2 | Illustration of defining functions, note how in anotherfn we provide default values incase the user does not specify them 3 | To run: epython functions.py 4 | */ 5 | 6 | def fn(a,b): 7 | print a+b 8 | 9 | def anotherfn(a=10, b=20): 10 | return a+b 11 | 12 | 13 | fn(1,2) 14 | fn("hello ", "world") 15 | 16 | print anotherfn() 17 | print anotherfn(2) 18 | print anotherfn(2,4) -------------------------------------------------------------------------------- /examples/arrays.py: -------------------------------------------------------------------------------- 1 | /* 2 | Illustration of arrays, getting a random number and printing values out. Dim will pop the array in core local memory, you can specify shared memory via sdim 3 | To run: epython arrays.py 4 | */ 5 | 6 | from random import randrange 7 | 8 | a=[0]*100 9 | i=0 10 | while i<100: 11 | a[i]=i 12 | i+=1 13 | r=randrange(100) 14 | print "Random index is "+str(r)+" value is "+str(a[r]) 15 | -------------------------------------------------------------------------------- /examples/synccores.py: -------------------------------------------------------------------------------- 1 | /* 2 | Illustrates the synchronisation across all cores, all cores display the hello message,wait and then the after message once othe cores have 3 | caught up. Comment out the sync line and rerun to see the messages more interleaved 4 | To run: epython synccores.py 5 | */ 6 | from parallel import * 7 | 8 | print "Hello from core "+str(coreid()) 9 | sync() 10 | print "After sync from core "+str(coreid()) 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.d 4 | *.ko 5 | *.obj 6 | *.elf 7 | *.cbp 8 | *.layout 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Libraries 15 | *.lib 16 | *.a 17 | *.la 18 | *.lo 19 | 20 | # Shared objects (inc. Windows DLLs) 21 | *.dll 22 | *.so 23 | *.so.* 24 | *.dylib 25 | 26 | # Executables 27 | *.exe 28 | *.out 29 | *.app 30 | *.i*86 31 | *.x86_64 32 | *.hex 33 | 34 | # Eclipse 35 | .project 36 | .cproject 37 | 38 | /Debug/ 39 | -------------------------------------------------------------------------------- /examples/pi_offload.py: -------------------------------------------------------------------------------- 1 | from epython import offload 2 | 3 | @offload 4 | def findPI(darts, rounds): 5 | from random import random 6 | from math import pow 7 | 8 | mypi=0.0 9 | i=1 10 | while i<=rounds: 11 | score=0.0 12 | j=1 13 | while j<=darts: 14 | x=random() 15 | y=random() 16 | if (pow(x,2) + pow(y,2) < 1.0): 17 | score+=1 18 | j+=1 19 | mypi=mypi+4.0 * (score/darts) 20 | i+=1 21 | return mypi 22 | 23 | pi=sum(findPI(100,10)) 24 | print "Value of PI is "+str((pi/10)/16) 25 | -------------------------------------------------------------------------------- /examples/controlflow.py: -------------------------------------------------------------------------------- 1 | /* 2 | Illustrates control flow, you can limit the cores run with the -c argument 3 | i.e. epython controlflow.py will run on all cores 4 | i.e. epython -c 1 controlflow.py will run on core 1 only 5 | i.e. epython -c 1,2,3,4,9 controlflow.py will run on cores 1,2,3,4 and 9 6 | i.e. epython -c 1:7 controlflow.py will run on cores 1 to 7 inclusive 7 | */ 8 | 9 | 10 | from parallel import * 11 | 12 | if coreid()==0 or coreid()==1: 13 | print "Core id is 0 or 1" 14 | elif coreid()==2: 15 | print "Core id is 2" 16 | else: 17 | print "Core id is not 0, 1 or 2" 18 | -------------------------------------------------------------------------------- /examples/mandlebrot.py: -------------------------------------------------------------------------------- 1 | /* 2 | Simple mandlbrot example, based on a version by Mike Bell 3 | */ 4 | 5 | from parallel import * 6 | from util import * 7 | 8 | outstr="" 9 | 10 | x=-2.0 11 | y=-1.0 + (coreid()*0.125) 12 | 13 | i=0 14 | while i<=66: 15 | re = x 16 | im = y 17 | j=0 18 | while j<=20: 19 | re_next = re*re - im*im + x 20 | im = 2*re*im + y 21 | re=re_next 22 | j+=1 23 | if re*re + im*im < 4: 24 | outstr = outstr+"#" 25 | else: 26 | outstr = outstr+" " 27 | x+=0.05 28 | i+=1 29 | 30 | for i in range(16): 31 | if coreid()==i: 32 | print outstr 33 | sync() 34 | 35 | -------------------------------------------------------------------------------- /examples/odd-even-sort.py: -------------------------------------------------------------------------------- 1 | /* 2 | Simple odd-even sort 3 | */ 4 | 5 | from util import * 6 | from random import randrange 7 | from array import len, array 8 | 9 | x=array(100) 10 | for i in range(99): 11 | x[i]=randrange(100) 12 | 13 | sorted=false 14 | while not sorted: 15 | sorted=true 16 | i=0 17 | while i x[i+1]: 19 | temp= x[i] 20 | x[i]=x[i+1] 21 | x[i+1] = temp 22 | sorted=false 23 | i+=2 24 | i=1 25 | while i x[i+1]: 27 | temp= x[i] 28 | x[i]=x[i+1] 29 | x[i+1] = temp 30 | sorted=false 31 | i+=2 32 | 33 | for i in x: 34 | print i -------------------------------------------------------------------------------- /device/makefile: -------------------------------------------------------------------------------- 1 | CC=e-gcc 2 | CFLAGS=-I ../ -I ../interpreter -Os -fno-exceptions -freg-struct-return -fno-default-inline 3 | LDFLAGS=-T linker.ldf -Wl,--gc-sections 4 | 5 | all: clean epython-device.elf 6 | epython-device.elf: main.o device-functions.o ../interpreter/interpreter.o 7 | bins = epython-device.elf 8 | 9 | .PHONE: check 10 | 11 | %.o : %.c 12 | $(CC) $(CFLAGS) -MMD -o $@ -c $< 13 | 14 | $(bins) : 15 | $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -le-lib 16 | e-objcopy --srec-forceS3 --output-target srec epython-device.elf epython-device.srec 17 | 18 | clean: 19 | $(RM) -f -v $(bins) *.yy.[ch] *.tab.[ch] *.o *.d *.output ../interpreter/*.o 20 | 21 | -include *.d 22 | -------------------------------------------------------------------------------- /examples/task_farm_example.py: -------------------------------------------------------------------------------- 1 | import taskfarm 2 | 3 | initTaskFarm(0) 4 | 5 | if (coreid() == 0): 6 | execFunction(1, exampleFn, 35) 7 | arr=[0]*10 8 | i=0 9 | while i x[i+1]: 37 | temp= x[i] 38 | x[i]=x[i+1] 39 | x[i+1] = temp 40 | sorted=false 41 | i+=2 42 | i=1 43 | while i x[i+1]: 45 | temp= x[i] 46 | x[i]=x[i+1] 47 | x[i+1] = temp 48 | sorted=false 49 | i+=2 50 | -------------------------------------------------------------------------------- /modules/math.py: -------------------------------------------------------------------------------- 1 | def pow(a,b): 2 | return a ^ b 3 | 4 | def pi(): 5 | return 3.141592 6 | 7 | def e(): 8 | return 2.718281 9 | 10 | def exp(x): 11 | return pow(e(), x) 12 | 13 | def sqrt(a): 14 | return native rtl_math(0, a) 15 | 16 | def sin(a): 17 | return native rtl_math(1, a) 18 | 19 | def cos(a): 20 | return native rtl_math(2, a) 21 | 22 | def tan(a): 23 | return native rtl_math(3, a) 24 | 25 | def asin(a): 26 | return native rtl_math(4, a) 27 | 28 | def acos(a): 29 | return native rtl_math(5, a) 30 | 31 | def atan(a): 32 | return native rtl_math(6, a) 33 | 34 | def sinh(a): 35 | return native rtl_math(7, a) 36 | 37 | def cosh(a): 38 | return native rtl_math(8, a) 39 | 40 | def tanh(a): 41 | return native rtl_math(9, a) 42 | 43 | def floor(a): 44 | return native rtl_math(10, a) 45 | 46 | def ceil(a): 47 | return native rtl_math(11, a) 48 | 49 | def log(a): 50 | return native rtl_math(12, a) 51 | 52 | def log10(a): 53 | return native rtl_math(13, a) 54 | -------------------------------------------------------------------------------- /host/makefile: -------------------------------------------------------------------------------- 1 | CFLAGS := -O3 -DHOST_INTERPRETER -Wall -Wextra -Wno-unused-parameter -Wmissing-prototypes -std=c99 -I ../interpreter 2 | OBJECTS := lexer.o parser.o main.o memorymanager.o byteassembler.o stack.o misc.o configuration.o ../interpreter/interpreter.o host-functions.o python_interoperability.o 3 | 4 | LIBS=-lm -lpthread 5 | 6 | ifeq ($(STANDALONE),1) 7 | CFLAGS+= -DHOST_STANDALONE 8 | else 9 | CFLAGS+= -I../ -I ${EPIPHANY_HOME}/tools/host/include -D__HOST__ -Dasm=__asm__ -Drestrict= 10 | OBJECTS+=device-support.o 11 | CC=arm-linux-gnueabihf-gcc 12 | LDFLAGS=-L ${EPIPHANY_HOME}/tools/host/lib 13 | LIBS+=-le-hal -lrt 14 | ifneq (,$(wildcard ${EPIPHANY_HOME}/tools/host/lib/libe-loader.so)) 15 | LIBS+=-le-loader 16 | endif 17 | endif 18 | 19 | YFLAGS := -d 20 | LFLAGS := 21 | 22 | epython: $(OBJECTS) 23 | $(CC) $(LDFLAGS) -o epython-host $(OBJECTS) $(LIBS) 24 | 25 | full: lexer parser epython 26 | 27 | .PHONE: check 28 | 29 | %.o : %.c 30 | $(CC) $(CFLAGS) -MMD -o $@ -c $< 31 | 32 | lexer: 33 | $(LEX) $(LFLAGS) -o lexer.c epython.l 34 | 35 | parser: 36 | $(YACC) $(YFLAGS) -o parser.c epython.y 37 | 38 | clean: 39 | $(RM) -f -v $(bins) *.yy.[ch] *.tab.[ch] *.o *.d *.output 40 | 41 | -include *.d 42 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Nick Brown 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /host/stack.h: -------------------------------------------------------------------------------- 1 | /* 2 | * stack.h 3 | * 4 | * Created on: 9 July 2015 5 | * Author: Nick Brown 6 | */ 7 | 8 | #ifndef HOST_STACK_H_ 9 | #define HOST_STACK_H_ 10 | 11 | #define INITIAL_STACK_SIZE 10 12 | 13 | struct stack_t { 14 | int size, width; 15 | char * type; 16 | void **data; 17 | }; 18 | 19 | struct identifier_exp { 20 | char * identifier; 21 | struct memorycontainer* exp; 22 | }; 23 | 24 | #include "byteassembler.h" 25 | 26 | struct stack_t* getNewStack(void); 27 | void clearStack(struct stack_t*); 28 | void initStack(struct stack_t*); 29 | int getStackSize(struct stack_t*); 30 | int pop(struct stack_t*); 31 | void push(struct stack_t*, int); 32 | char* popIdentifier(struct stack_t*); 33 | void pushIdentifier(struct stack_t*, char*); 34 | int peek(struct stack_t*); 35 | int getTopType(struct stack_t*); 36 | struct memorycontainer* popExpression(struct stack_t*); 37 | void pushExpression(struct stack_t*, struct memorycontainer*); 38 | void pushIdentifierAssgnExpression(struct stack_t*, char*, struct memorycontainer*); 39 | struct identifier_exp* popExpressionIdentifier(struct stack_t*); 40 | struct memorycontainer* getExpressionAt(struct stack_t*, int); 41 | struct identifier_exp* getExpressionIdentifierAt(struct stack_t*, int); 42 | char* getIdentifierAt(struct stack_t*, int); 43 | int getTypeAt(struct stack_t*, int); 44 | 45 | #endif /* HOST_STACK_H_ */ 46 | -------------------------------------------------------------------------------- /examples/jacobi_offload.py: -------------------------------------------------------------------------------- 1 | from epython import * 2 | import array 3 | import math 4 | 5 | data=None 6 | data_p1=None 7 | define_on_device(data) 8 | define_on_device(data_p1) 9 | 10 | MAX_ITS=10000 11 | 12 | @offload 13 | def initialise(global_size): 14 | num_local=global_size/(numcores()-1) 15 | if num_local * (numcores()-1) != global_size: 16 | if (coreid() < global_size-num_local*(numcores()-1)): num_local+=1 17 | data=[0.0]*(num_local+2) 18 | data_p1=[0.0]*(num_local+2) 19 | if coreid()==0: data[0]=1.0 20 | if coreid()==numcores()-2: data[num_local+1]=10.0 21 | 22 | @offload 23 | def calc_residual(): 24 | tmpnorm=0.0 25 | i=1 26 | while i<=len(data)-2: 27 | tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2 28 | i+=1 29 | return tmpnorm 30 | 31 | @offload 32 | def jacobi_iteration(): 33 | if (coreid() > 0): data[0]=sendrecv(data[1], coreid()-1) 34 | if (coreid() < numcores()-2): data[len(data)-1]=sendrecv(data[len(data)-2], coreid()+1) 35 | i=1 36 | while i<=len(data)-2: 37 | data_p1[i]=0.5* (data[i-1] + data[i+1]) 38 | i+=1 39 | # Swap data around for next iteration 40 | i=1 41 | while i<=len(data)-2: 42 | data[i]=data_p1[i] 43 | i+=1 44 | 45 | initialise(100) 46 | a=calc_residual() 47 | bnorm=math.sqrt(sum(a)) 48 | 49 | norm=1.0 50 | it=0 51 | 52 | while norm > 1e-4: 53 | jacobi_iteration() 54 | rn=calc_residual() 55 | norm=math.sqrt(sum(rn))/bnorm 56 | it+=1 57 | if it%50 == 0 : print "Rnorm is "+str(norm)+" after "+str(it)+" iterations" 58 | -------------------------------------------------------------------------------- /examples/pipeline.py: -------------------------------------------------------------------------------- 1 | from parallel import * 2 | from util import * 3 | from random import randrange 4 | from array import len 5 | 6 | sorting_size=100 7 | data=[0]*sorting_size 8 | 9 | if (coreid()==0): 10 | pipelineStageOne(10) 11 | elif (coreid()==1): 12 | pipelineStageTwo() 13 | elif (coreid()==2): 14 | pipelineStageThree() 15 | elif (coreid()==3): 16 | pipelineStageFour() 17 | 18 | def pipelineStageOne(num_items): 19 | for i in range(num_items): 20 | num=randrange(sorting_size-5) + 5 21 | send(num, coreid()+1) 22 | send(-1,coreid()+1) 23 | 24 | def pipelineStageTwo(): 25 | num=0 26 | while num >= 0: 27 | num=recv(coreid()-1) 28 | if num > 0: 29 | i=0 30 | while i < num: 31 | data[i]=randrange(1000) 32 | i+=1 33 | send(num, coreid()+1) 34 | if num > 0: send(data, coreid()+1, num) 35 | 36 | def pipelineStageThree(): 37 | num=0 38 | while num >=0: 39 | num=recv(coreid()-1) 40 | if num > 0: 41 | data=recv(coreid()-1, num) 42 | oddSort(data) 43 | send(num, coreid()+1) 44 | if num > 0: send(data, coreid()+1, num) 45 | 46 | def pipelineStageFour(): 47 | num=0 48 | num_contig=0.0 49 | total_num=0 50 | while num >=0: 51 | num=recv(coreid()-1) 52 | if num > 0: 53 | total_num+=num 54 | data=recv(coreid()-1, num) 55 | cnum=data[0] 56 | ccount=1 57 | i=0 58 | while i < num: 59 | if (data[i] == cnum): 60 | ccount+=1 61 | else: 62 | num_contig+=ccount 63 | cnum=data[i] 64 | ccount=0 65 | i+=1 66 | chance=(num_contig/total_num)*100 67 | print chance+"% of numbers were contiguous" 68 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | prefix ?= /usr 2 | bindir = $(prefix)/bin 3 | includedir = $(prefix)/include 4 | 5 | all: epiphany 6 | 7 | standalone: clean 8 | @cd host; $(MAKE) epython STANDALONE=1 9 | @mv host/epython-host . 10 | 11 | standalone-full: clean 12 | @cd host; $(MAKE) full STANDALONE=1 13 | @mv host/epython-host . 14 | 15 | epiphany: clean host-build device-build 16 | 17 | full: clean host-full device-build 18 | 19 | host-build: 20 | @cd host; $(MAKE) epython 21 | @mv host/epython-host . 22 | 23 | host-full: 24 | @cd host; $(MAKE) full 25 | @mv host/epython-host . 26 | 27 | device-build: 28 | @cd device; $(MAKE) 29 | @mv device/epython-device.srec . 30 | @mv device/epython-device.elf . 31 | 32 | clean: 33 | @cd interpreter; rm -f *.o *.d 34 | @cd host; $(MAKE) clean 35 | @cd device; $(MAKE) clean 36 | 37 | install: 38 | @mkdir -p $(DESTDIR)$(bindir) 39 | @cp epython-host epython-device.srec epython-device.elf $(DESTDIR)$(bindir) 40 | @cp epython.sh $(DESTDIR)$(bindir)/epython 41 | @mkdir -p $(DESTDIR)$(includedir)/epython 42 | @cp -R modules $(DESTDIR)$(includedir)/epython/. 43 | @echo 'export EPYTHONPATH=$$EPYTHONPATH:$(includedir)/epython/modules:$(shell pwd)' >> ~/.bashrc 44 | @echo 'export PYTHONPATH=$$PYTHONPATH:$(includedir)/epython/modules/fullpython' >> ~/.bashrc 45 | @echo "ePython installed, start a new bash session by executing bash before running ePython" 46 | 47 | uninstall: 48 | @rm $(DESTDIR)$(bindir)/epython-host 49 | @rm $(DESTDIR)$(bindir)/epython-device.srec 50 | @rm $(DESTDIR)$(bindir)/epython-device.elf 51 | @rm $(DESTDIR)$(bindir)/epython 52 | @rm $(DESTDIR)$(includedir)/epython/modules/*.py 53 | -------------------------------------------------------------------------------- /examples/parallel-odd-even-sort.py: -------------------------------------------------------------------------------- 1 | from util import * 2 | from parallel import * 3 | from random import randrange 4 | from array import len, array 5 | 6 | ln=25 7 | N=ln * numcores() 8 | x=[0]*ln 9 | other=[0]*ln 10 | i=0 11 | while i < ln: 12 | x[i]=randrange(1000) 13 | i+=1 14 | 15 | k=0 16 | while k <= numcores()-1: 17 | oddSort(x) 18 | partner=0 19 | if (k%2 == 0): 20 | if (coreid() % 2 == 0): 21 | partner=coreid()+1 22 | else: 23 | partner=coreid()-1 24 | else: 25 | if (coreid()%2 == 0): 26 | partner=coreid()-1 27 | else: 28 | partner=coreid()+1 29 | if (partner >= 0 and partner < numcores()): 30 | other=sendrecv(x, partner, ln) 31 | if coreid() < partner: 32 | swap_values(other, x) 33 | else: 34 | swap_values(x, other) 35 | k+=1 36 | 37 | for j in range(numcores()-1): 38 | if (j==coreid()): 39 | for i in range(ln-1): 40 | print x[i] 41 | sync() 42 | 43 | def swap_values(a, b): 44 | searching=true 45 | while searching: 46 | searching=false 47 | min_index=get_min_index(a) 48 | max_index=get_max_index(b) 49 | if (a[min_index] < b[max_index]): 50 | temp=a[min_index] 51 | a[min_index]=b[max_index] 52 | b[max_index]=temp 53 | searching=true 54 | 55 | def get_min_index(a): 56 | v=0 57 | j=-1 58 | i=0 59 | while i <= len(a)-1: 60 | if (j == -1 or v > a[i]): 61 | v=a[i] 62 | j=i 63 | i+=1 64 | return j 65 | 66 | def get_max_index(a): 67 | v=0 68 | j=-1 69 | i=0 70 | while i <= len(a)-1: 71 | if (j == -1 or v < a[i]): 72 | v=a[i] 73 | j=i 74 | i+=1 75 | return j -------------------------------------------------------------------------------- /modules/parallel.py: -------------------------------------------------------------------------------- 1 | def send(data, pid, n=none): 2 | if (n is none): 3 | native rtl_send(data,pid) 4 | else: 5 | i=0 6 | while i 13: matchingpid=1 26 | for i in range(1,13,3): 27 | send(-1,i) 28 | 29 | def pipelineStageTwo(): 30 | num=0 31 | while num >= 0: 32 | num=recv(0) 33 | if num > 0: 34 | i=0 35 | while i < num: 36 | data[i]=randrange(1000) 37 | i+=1 38 | send(num, coreid()+1) 39 | if num > 0: send(data, coreid()+1, num) 40 | 41 | def pipelineStageThree(): 42 | num=0 43 | while num >=0: 44 | num=recv(coreid()-1) 45 | if num > 0: 46 | data=recv(coreid()-1, num) 47 | oddSort(data, num) 48 | send(num, coreid()+1) 49 | if num > 0: send(data, coreid()+1, num) 50 | 51 | def pipelineStageFour(): 52 | num=0 53 | num_contig=0.0 54 | total_num=0 55 | while num >=0: 56 | num=recv(coreid()-1) 57 | if num > 0: 58 | total_num+=num 59 | data=recv(coreid()-1, num) 60 | cnum=data[0] 61 | ccount=1 62 | i=0 63 | while i < num: 64 | if (data[i] == cnum): 65 | ccount+=1 66 | else: 67 | num_contig+=ccount 68 | cnum=data[i] 69 | ccount=0 70 | i+=1 71 | chance=(num_contig/total_num)*100 72 | print chance+"% of numbers were contiguous" 73 | -------------------------------------------------------------------------------- /host/python_interoperability.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef PYTHONINTEROPERABILITY_H_ 28 | #define PYTHONINTEROPERABILITY_H_ 29 | 30 | #include "../shared.h" 31 | #include "configuration.h" 32 | #include 33 | 34 | void runFullPythonInteractivityOnHost(struct interpreterconfiguration*, struct shared_basic*, pthread_t*, char); 35 | 36 | #endif /* CONFIGURATION_H_ */ 37 | -------------------------------------------------------------------------------- /device/main.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef MAIN_H_ 28 | #define MAIN_H_ 29 | #include 30 | #include "shared.h" 31 | 32 | extern volatile e_barrier_t syncbarriers[TOTAL_CORES], collectivebarriers[TOTAL_CORES]; 33 | extern e_barrier_t *sync_tgt_bars[TOTAL_CORES], *collective_tgt_bars[TOTAL_CORES]; 34 | extern volatile struct shared_basic * sharedData; 35 | extern volatile unsigned char syncValues[TOTAL_CORES]; 36 | extern int myId, lowestCoreId; 37 | 38 | #endif /* MAIN_H_ */ 39 | -------------------------------------------------------------------------------- /host/misc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef MISC_H_ 28 | #define MISC_H_ 29 | 30 | #define SQRT_MATHS_OP 0 31 | #define SIN_MATHS_OP 1 32 | #define COS_MATHS_OP 2 33 | #define TAN_MATHS_OP 3 34 | #define ASIN_MATHS_OP 4 35 | #define ACOS_MATHS_OP 5 36 | #define ATAN_MATHS_OP 6 37 | #define SINH_MATHS_OP 7 38 | #define COSH_MATHS_OP 8 39 | #define TANH_MATHS_OP 9 40 | #define FLOOR_MATHS_OP 10 41 | #define CEIL_MATHS_OP 11 42 | #define LOG_MATHS_OP 12 43 | #define LOG10_MATHS_OP 13 44 | #define RANDOM_MATHS_OP 14 45 | 46 | void errorCheck(int, char*); 47 | char* translateErrorCodeToMessage(unsigned char); 48 | 49 | #endif /* CONFIGURATION_H_ */ 50 | -------------------------------------------------------------------------------- /host/configuration.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef CONFIGURATION_H_ 28 | #define CONFIGURATION_H_ 29 | 30 | #define VERSION_IDENT "2.0" 31 | 32 | // Configuration structure which is filled based upon command line arguments 33 | struct interpreterconfiguration { 34 | char * intentActive; 35 | char displayStats, displayTiming, forceCodeOnCore, forceCodeOnShared, forceDataOnShared, displayPPCode; 36 | char * filename, *compiledByteFilename, *loadByteFilename, *pipedInContents; 37 | int hostProcs, coreProcs, loadElf, loadSrec, fullPythonHost; 38 | }; 39 | 40 | struct interpreterconfiguration* readConfiguration(int, char *[]); 41 | 42 | #endif /* CONFIGURATION_H_ */ 43 | -------------------------------------------------------------------------------- /docs/installupgrade.md: -------------------------------------------------------------------------------- 1 | # Installing ePython 2 | 3 | To install ePython you need to checkout a version from the repository, build it and then install it. Firstly, log into your Parallella and issue 4 | 5 | ``` 6 | git clone https://github.com/mesham/epython.git 7 | ``` 8 | 9 | copy ePython onto your machine and a directory called *epython* will have been created. Next we are going to build this by issuing the make command, from the top level directory (the one you issued the git clone into) issue the following two commands: 10 | 11 | ``` 12 | cd epython 13 | make 14 | ``` 15 | 16 | The build process takes around 20-30 seconds and once complete you will be returned to the bash prompt. The next (and last) step is to install ePython to a central location by issuing the two commands below. You will be promoted for your user's password with the first command. The second command (bash) starts a new bash terminal which has the python paths correctly set and every time bash starts from now on it will be correctly configured for ePython. 17 | 18 | ``` 19 | sudo make install 20 | bash 21 | ``` 22 | 23 | Congratulations! You have installed ePython and are ready to start programming! 24 | 25 | # Upgrading ePython 26 | 27 | ePython is actively being developed, so new features and bug fixes are being added to the code. Because of this, and to ensure you can run the latest examples, it is useful to periodically ensure you have the latest ePython and if not upgrade your version. The process is similar to the installation process, ensure you are in the *epython* directory (you might need to issue *cd epython*) and then issue: 28 | 29 | ``` 30 | git pull 31 | ``` 32 | 33 | This will contact the ePython repository and download any updates. If you see the message *Already up-to-date.* then you already have the latest version and can stop here, if not then you have downloaded some updates and we need to build and install these via: 34 | 35 | ``` 36 | make 37 | sudo make install 38 | ``` 39 | 40 | This will replace the older version of ePython with this latest, newer version. It is just the ePython interpreter that is being updated, so don't worry - all your own Python codes will remain unchanged and untouched by this. 41 | -------------------------------------------------------------------------------- /examples/jacobi.py: -------------------------------------------------------------------------------- 1 | /* 2 | Jacobi iteration to solve Laplace's equation for diffusion in one dimension 3 | This illustrates distributing data amongst the cores, halo swapping and reductions 4 | */ 5 | 6 | from parallel import * 7 | from math import sqrt 8 | 9 | DATA_SIZE=100 10 | MAX_ITS=10000 11 | 12 | # Work out the amount of data to hold on this core 13 | local_size=DATA_SIZE/numcores() 14 | if local_size * numcores() != DATA_SIZE: 15 | if (coreid() < DATA_SIZE-local_size*numcores()): local_size=local_size+1 16 | 17 | # Allocate the two arrays (two as this is Jacobi) we +2 to account for halos/boundary conditions 18 | data=[0] * (local_size+2) 19 | data_p1=[0]* (local_size+2) 20 | 21 | # Set the initial conditions 22 | i=0 23 | while i<=local_size+1: 24 | data[i]=0.0 25 | i+=1 26 | 27 | if coreid()==0: data[0]=1.0 28 | if coreid()==numcores()-1: data[local_size+1]=10.0 29 | 30 | # Compute the initial absolute residual 31 | tmpnorm=0.0 32 | i=1 33 | while i<=local_size: 34 | tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2 35 | i+=1 36 | tmpnorm=reduce(tmpnorm, "sum") 37 | bnorm=sqrt(tmpnorm) 38 | 39 | norm=1.0 40 | its=0 41 | while norm >= 1e-4 and its < MAX_ITS: 42 | # Halo swap to my left and right neighbours if I have them 43 | if (coreid() > 0): data[0]=sendrecv(data[1], coreid()-1) 44 | if (coreid() < numcores()-1): data[local_size+1]=sendrecv(data[local_size], coreid()+1) 45 | 46 | # Calculate current residual 47 | tmpnorm=0.0 48 | i=1 49 | while i<=local_size: 50 | tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2 51 | i+=1 52 | tmpnorm=reduce(tmpnorm, "sum") 53 | norm=sqrt(tmpnorm)/bnorm 54 | 55 | if coreid()==0 and its%1000 == 0: print "RNorm is "+norm+" at "+its+" iterations" 56 | 57 | # Performs the Jacobi iteration for Laplace 58 | i=1 59 | while i<=local_size: 60 | data_p1[i]=0.5* (data[i-1] + data[i+1]) 61 | i+=1 62 | # Swap data around for next iteration 63 | i=1 64 | while i<=local_size: 65 | data[i]=data_p1[i] 66 | i+=1 67 | its+=1 68 | 69 | if coreid()==0: print "Completed in "+str(its)+" iterations, RNorm="+str(norm) -------------------------------------------------------------------------------- /examples/gauss-seidel.py: -------------------------------------------------------------------------------- 1 | /* 2 | Red-black Gauss Seidel with SOR to solve Laplace's equation for diffusion in one dimension 3 | This illustrates distributing data amongst the cores, halo swapping and reductions 4 | */ 5 | 6 | from parallel import * 7 | from math import sqrt 8 | 9 | DATA_SIZE=100 10 | MAX_ITS=10000 11 | W=1.3 # Overrelaxing factor (between 1 and 2) 12 | 13 | # Work out the amount of data to hold on this core 14 | local_size=DATA_SIZE/numcores() 15 | if local_size * numcores() != DATA_SIZE: 16 | if (coreid() < DATA_SIZE-local_size*numcores()): local_size=local_size+1 17 | 18 | # Allocate the two arrays (two as this is Jacobi) we +2 to account for halos/boundary conditions 19 | data=[0]*(local_size+2) 20 | 21 | # Set the initial conditions 22 | i=0 23 | while i<=local_size+1: 24 | data[i]=0.0 25 | i+=1 26 | 27 | if coreid()==0: data[0]=1.0 28 | if coreid()==numcores()-1: data[local_size+1]=10.0 29 | 30 | # Compute the initial absolute residual 31 | tmpnorm=0.0 32 | i=1 33 | while i<=local_size: 34 | tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2 35 | i+=1 36 | tmpnorm=reduce(tmpnorm, "sum") 37 | bnorm=sqrt(tmpnorm) 38 | norm=1.0 39 | its=0 40 | while norm >= 1e-4 and its < MAX_ITS: 41 | # Halo swap to my left and right neighbours if I have them 42 | if (coreid() > 0): data[0]=sendrecv(data[1], coreid()-1) 43 | if (coreid() < numcores()-1): data[local_size+1]=sendrecv(data[local_size], coreid()+1) 44 | 45 | # Calculate current residual 46 | tmpnorm=0.0 47 | i=1 48 | while i<=local_size: 49 | tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2 50 | i+=1 51 | tmpnorm=reduce(tmpnorm, "sum") 52 | norm=sqrt(tmpnorm)/bnorm 53 | if coreid()==0 and its%1000 == 0: print "RNorm is "+str(norm)+" at "+str(its)+" iterations" 54 | j=0 55 | while j<2: 56 | if (j==1): 57 | i=1 58 | else: 59 | i=2 60 | while i<=local_size: 61 | data[i]=((1-W) * data[i]) + 0.5 * W * (data[i-1]+data[i+1]) 62 | i+=2 63 | j+=1 64 | its+=1 65 | 66 | if coreid()==0: print "Completed in "+str(its)+" iterations, RNorm="+str(norm) -------------------------------------------------------------------------------- /modules/array.py: -------------------------------------------------------------------------------- 1 | def array(a,b=none,c=none,d=none,e=none,f=none,g=none): 2 | if (b is none): 3 | return native rtl_allocatearray(a) 4 | elif (c is none): 5 | return native rtl_allocatearray(a,b) 6 | elif (d is none): 7 | return native rtl_allocatearray(a,b,c) 8 | elif (e is none): 9 | return native rtl_allocatearray(a,b,c,d) 10 | elif (f is none): 11 | return native rtl_allocatearray(a,b,c,d,e) 12 | elif (g is none): 13 | return native rtl_allocatearray(a,b,c,d,e,f) 14 | else: 15 | return native rtl_allocatearray(a,b,c,d,e,f,g) 16 | 17 | def shared_mem_array(a,b=none,c=none,d=none,e=none,f=none,g=none): 18 | if (b is none): 19 | return native rtl_allocatesharedarray(a) 20 | elif (c is none): 21 | return native rtl_allocatesharedarray(a,b) 22 | elif (d is none): 23 | return native rtl_allocatesharedarray(a,b,c) 24 | elif (e is none): 25 | return native rtl_allocatesharedarray(a,b,c,d) 26 | elif (f is none): 27 | return native rtl_allocatesharedarray(a,b,c,d,e) 28 | elif (g is none): 29 | return native rtl_allocatesharedarray(a,b,c,d,e,f) 30 | else: 31 | return native rtl_allocatesharedarray(a,b,c,d,e,f,g) 32 | 33 | def flatten(arr): 34 | native rtl_flatten(arr, size(arr)) 35 | return arr 36 | 37 | def arraycopy(target, source): 38 | if (len(target) != len(source)): 39 | print "Error, array copy overall sizes must match" 40 | exit() 41 | else: 42 | native rtl_arraycopy(target, source, ndim(target), ndim(source), len(target)) 43 | 44 | def size(arr): 45 | dims=ndim(arr) 46 | if dims > 0: 47 | s=shape(arr) 48 | arraylength=1 49 | i=0 50 | while i 0): 58 | send(arg, pid, datalen) 59 | else: 60 | send(arg, pid) 61 | 62 | def worker(): 63 | num_args=recv(_masterTask) 64 | while num_args >= 0: 65 | op=recv(_masterTask) 66 | retVal=none 67 | if (num_args == 0): retVal=op() 68 | if (num_args == 1): retVal=op(recvArgument(_masterTask)) 69 | if (num_args == 2): retVal=op(recvArgument(_masterTask), recvArgument(_masterTask)) 70 | if (num_args == 3): retVal=op(recvArgument(_masterTask), recvArgument(_masterTask), recvArgument(_masterTask)) 71 | if (num_args == 4): retVal=op(recvArgument(_masterTask), recvArgument(_masterTask), recvArgument(_masterTask), recvArgument(_masterTask)) 72 | if (num_args == 5): retVal=op(recvArgument(_masterTask), recvArgument(_masterTask), recvArgument(_masterTask), recvArgument(_masterTask), recvArgument(_masterTask)) 73 | if (retVal is none): 74 | send(-1, _masterTask) 75 | else: 76 | datalen=len(retVal) 77 | send(datalen, _masterTask) 78 | if (datalen==0): 79 | send(retVal, _masterTask) 80 | else: 81 | send(retVal, _masterTask, datalen) 82 | num_args=recv(_masterTask) 83 | 84 | def recvArgument(pid): 85 | length=recv(pid) 86 | if (length==0): 87 | return recv(pid) 88 | else: 89 | return recv(pid, length) 90 | -------------------------------------------------------------------------------- /examples/mergesort.py: -------------------------------------------------------------------------------- 1 | /* 2 | Parallel mergesort using divide and conquer. An unsorted random list of numbers is generated on core 0, then each core will split the data until 3 | there are no more cores left, then each will sequentially solve its base case using a bubblesort. The sorted results on each core are then merged 4 | back together and core 0 will display the sorted list. Whilst the sequential (bubblesort) algorithm is inefficient, this illustrates the general 5 | concept and could be swapped out for something better such as quicksort if desired. 6 | */ 7 | 8 | from util import * 9 | from parallel import * 10 | from random import randrange 11 | from array import array 12 | 13 | na=128 14 | data=array(na) 15 | 16 | if coreid()==0: 17 | populateData(data, na) 18 | sort(data, 0, na) 19 | displayData(data, na) 20 | else: 21 | level=getlevel() 22 | pid=getparentId(level) 23 | dsize=na/(2^level) 24 | d=recv(pid, dsize) 25 | sort(d, level, dsize) 26 | send(d, coreid() - (2^(4-level)), dsize) 27 | 28 | def sort(d, level, thissize): 29 | if level == 4: 30 | bubblesort(d, thissize) 31 | else: 32 | pivot=thissize/2 33 | cid=coreid() + (2^(3-level)) 34 | send(d, cid, pivot) 35 | split=array(thissize-pivot) 36 | for x in range(pivot, thissize-1): 37 | split[x-pivot]=d[x] 38 | sort(split,level+1, thissize-pivot) 39 | ssplit=recv(cid, pivot) 40 | merge(d, split, ssplit, pivot, thissize) 41 | 42 | def merge(target, split, ssplit, pivot, length): 43 | i=0 44 | pre_index=0 45 | post_index=0 46 | while i= pivot: 48 | target[i]=ssplit[post_index] 49 | post_index=post_index+1 50 | elif post_index >=length-pivot: 51 | target[i]=split[pre_index] 52 | pre_index=pre_index+1 53 | elif split[pre_index] < ssplit[post_index]: 54 | target[i]=split[pre_index] 55 | pre_index=pre_index+1 56 | else: 57 | target[i]=ssplit[post_index] 58 | post_index=post_index+1 59 | i=i+1 60 | 61 | def bubblesort(d,size): 62 | i=0 63 | while i d[i]: 67 | temp=d[i] 68 | d[i]=d[j] 69 | d[j]=temp 70 | j=j+1 71 | i=i+1 72 | 73 | 74 | def populateData(d, l): 75 | i=0 76 | while i 1: 90 | if isdivbyn(coreid(), cc): return i 91 | cc=cc/2 92 | i=i+1 93 | return i 94 | 95 | def getparentId(level): 96 | cc=numcores() 97 | for x in range(1,level): 98 | cc=cc/2 99 | return coreid()-cc 100 | 101 | def isdivbyn(a,b): 102 | if (a/b)*b==a: 103 | return true 104 | else: 105 | return false 106 | 107 | -------------------------------------------------------------------------------- /interpreter/functions.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef FUNCTIONS_H_ 28 | #define FUNCTIONS_H_ 29 | 30 | #include "interpreter.h" 31 | 32 | /* 33 | * These functions are implemented by the device and host to support running in normal parallel core 34 | * Epiphany mode, and also standalone host mode only which is useful for interpreter development/testing 35 | */ 36 | 37 | #ifdef HOST_INTERPRETER 38 | void callNativeFunction(struct value_defn*, unsigned char, int, struct value_defn*,int,int,int,struct symbol_node*,int); 39 | char* getHeapMemory(int,char,int); 40 | void freeMemoryInHeap(void*,int); 41 | void syncCores(int, int); 42 | struct value_defn performStringConcatenation(struct value_defn, struct value_defn, int); 43 | #else 44 | void callNativeFunction(struct value_defn*, unsigned char, int, struct value_defn*, int, int, int, struct symbol_node*); 45 | char* getHeapMemory(int,char,int,struct symbol_node*); 46 | void freeMemoryInHeap(void*); 47 | void syncCores(int); 48 | struct value_defn performStringConcatenation(struct value_defn, struct value_defn, int, struct symbol_node*); 49 | #endif 50 | int checkStringEquality(struct value_defn, struct value_defn); 51 | struct symbol_node* initialiseSymbolTable(int); 52 | char* getStackMemory(int,char); 53 | void clearFreedStackFrames(char*); 54 | void cpy(volatile void*, volatile void *, unsigned int); 55 | void raiseError(unsigned char); 56 | int slength(char*); 57 | int getInt(void*); 58 | float getFloat(void*); 59 | 60 | #endif /* FUNCTIONS_H_ */ 61 | -------------------------------------------------------------------------------- /host/memorymanager.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef MEMORYMANAGER_H_ 28 | #define MEMORYMANAGER_H_ 29 | 30 | #include "byteassembler.h" 31 | 32 | // Used to maintain a linked list of functions 33 | struct functionListNode { 34 | struct functionDefinition * fn; 35 | struct functionListNode * next; 36 | }; 37 | 38 | struct exportableFunctionTableNode { 39 | char * functionName; 40 | unsigned short functionLocation; 41 | struct exportableFunctionTableNode * next; 42 | }; 43 | 44 | extern struct exportableFunctionTableNode* exportableFunctionTable; 45 | extern int numberExportableFunctionsInTable; 46 | 47 | int getNumberOfSymbolEntriesNotUsed(void); 48 | void addFunction(struct functionDefinition*); 49 | int getNumberSymbolTableEntriesForRecursion(void); 50 | void compileMemory(struct memorycontainer*); 51 | struct memorycontainer* concatenateMemory(struct memorycontainer*, struct memorycontainer*); 52 | struct memorycontainer* cloneMemory(struct memorycontainer*); 53 | unsigned int appendStatement(struct memorycontainer*, unsigned char, unsigned int); 54 | unsigned int appendMemory(struct memorycontainer*, struct memorycontainer*, unsigned int); 55 | unsigned int appendVariable(struct memorycontainer*, unsigned short, unsigned int); 56 | unsigned int getMemoryFilledSize(void); 57 | void setMemoryFilledSize(unsigned int); 58 | char * getAssembledCode(void); 59 | void setAssembledCode(char*); 60 | 61 | extern struct function_call_tree_node mainCodeCallTree; 62 | 63 | #endif /* MEMORYMANAGER_H_ */ 64 | -------------------------------------------------------------------------------- /interpreter/interpreter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef INTERPRETER_H_ 28 | #define INTERPRETER_H_ 29 | 30 | #include "../shared.h" 31 | 32 | #define INT_TYPE 0 33 | #define REAL_TYPE 1 34 | #define STRING_TYPE 2 35 | #define BOOLEAN_TYPE 3 36 | #define NONE_TYPE 4 37 | #define FN_ADDR_TYPE 5 38 | 39 | #define INT_PTR_TYPE 100 40 | #define REAL_PTR_TYPE 101 41 | #define STRING_PTR_TYPE 102 42 | #define BOOLEAN_PTR_TYPE 103 43 | #define NONE_PTR_TYPE 104 44 | #define FN_ADDR_PTR_TYPE 105 45 | 46 | #define SCALAR 0 47 | #define ARRAY 1 48 | 49 | #define UNALLOCATED 1 50 | #define ALLOCATED 2 51 | #define ALIAS 3 52 | 53 | // The value in a symbol table; its type and data (which is integer/real or pointer to string 54 | // or array.) In host mode this is 8 bytes as often pointers are 64bit, but on Epiphany only 4 byte as 32 bit pointers 55 | struct value_defn { 56 | char type, dtype; 57 | #ifdef HOST_STANDALONE 58 | char data[8]; 59 | #else 60 | char data[4]; 61 | #endif 62 | }; 63 | 64 | // A node in the symbol table - its id and value 65 | struct symbol_node { 66 | unsigned short id, alias; 67 | unsigned char state, level; 68 | struct value_defn value __attribute__((aligned(8))); 69 | }; 70 | 71 | #ifdef HOST_INTERPRETER 72 | extern volatile char * stopInterpreter; 73 | void runIntepreter(char*, unsigned int, unsigned short, int, int, int); 74 | void initThreadedAspectsForInterpreter(int, int, struct shared_basic*); 75 | #else 76 | extern char stopInterpreter; 77 | void runIntepreter(char*, unsigned int, unsigned short, int, int, int); 78 | #endif 79 | #endif /* INTERPRETER_H_ */ 80 | -------------------------------------------------------------------------------- /examples/parallel_pipeline.py: -------------------------------------------------------------------------------- 1 | from parallel import * 2 | from util import * 3 | from random import randrange 4 | from array import len, array 5 | 6 | sorting_size=100 7 | data=[0]*sorting_size 8 | 9 | if (coreid()==0): 10 | pipelineStageOne(10) 11 | elif (coreid()==1): 12 | pipelineStageTwo() 13 | elif (coreid() >= 2 and coreid() <= 14): 14 | pipelineStageThree() 15 | elif (coreid()==15): 16 | pipelineStageFour() 17 | 18 | def pipelineStageOne(num_items): 19 | for i in range(num_items): 20 | num=randrange(sorting_size-5) + 5 21 | num+=num % 13 22 | send(num, coreid()+1) 23 | send(-1,coreid()+1) 24 | 25 | def pipelineStageTwo(): 26 | num=0 27 | while num >= 0: 28 | num=recv(coreid()-1) 29 | j=2 30 | while j<=14: 31 | if num > 0: 32 | i=0 33 | while i < num/13: 34 | data[i]=randrange(1000) 35 | i+=1 36 | send(num/13, j) 37 | send(data, j, num/13) 38 | else: 39 | send(-1, j) 40 | j+=1 41 | 42 | def pipelineStageThree(): 43 | num=0 44 | while num >=0: 45 | num=recv(1) 46 | if num > 0: 47 | data=recv(1, num) 48 | parallel_odd_even_sort(num) 49 | send(num, 15) 50 | if num > 0: send(data, 15, num) 51 | 52 | def pipelineStageFour(): 53 | rdata=array(100) 54 | num=0 55 | num_contig=0.0 56 | total_num=0 57 | while num >=0: 58 | i=2 59 | while i<=14: 60 | num=recv(i) 61 | if (num > 0): 62 | rdata=recv(i, num) 63 | j=num*i 64 | while j 0: 69 | num*=13 70 | total_num+=num 71 | cnum=data[0] 72 | ccount=0 73 | i=0 74 | while i < num: 75 | if (data[i] == cnum): 76 | ccount+=1 77 | else: 78 | num_contig+=ccount 79 | cnum=data[i] 80 | ccount=0 81 | i+=1 82 | chance=(num_contig/total_num)*100 83 | print chance+"% of numbers were contiguous" 84 | 85 | def parallel_odd_even_sort(ln): 86 | other=array(ln) 87 | i=2 88 | while i <= 14: 89 | oddSort(data,ln) 90 | partner=0 91 | if (i%2 == 0): 92 | if (coreid() % 2 == 0): 93 | partner=coreid()+1 94 | else: 95 | partner=coreid()-1 96 | else: 97 | if (coreid()%2 == 0): 98 | partner=coreid()-1 99 | else: 100 | partner=coreid()+1 101 | if (partner >= 2 and partner <= 14): 102 | other=sendrecv(data, partner, ln) 103 | if coreid() < partner: 104 | swap_values(other, data) 105 | else: 106 | swap_values(data, other) 107 | i+=1 108 | 109 | def swap_values(a, b): 110 | searching=true 111 | while searching: 112 | searching=false 113 | min_index=get_min_index(a) 114 | max_index=get_max_index(b) 115 | if (a[min_index] < b[max_index]): 116 | temp=a[min_index] 117 | a[min_index]=b[max_index] 118 | b[max_index]=temp 119 | searching=true 120 | 121 | def get_min_index(a): 122 | v=0 123 | j=-1 124 | i=0 125 | while i < len(a): 126 | if (j == -1 or v > a[i]): 127 | v=a[i] 128 | j=i 129 | i+=1 130 | return j 131 | 132 | def get_max_index(a): 133 | v=0 134 | j=-1 135 | i=0 136 | while i < len(a): 137 | if (j == -1 or v < a[i]): 138 | v=a[i] 139 | j=i 140 | i+=1 141 | return j 142 | -------------------------------------------------------------------------------- /device/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include "shared.h" 29 | #include "interpreter.h" 30 | #include "main.h" 31 | #include "functions.h" 32 | 33 | volatile e_barrier_t syncbarriers[TOTAL_CORES], collectivebarriers[TOTAL_CORES]; 34 | e_barrier_t *sync_tgt_bars[TOTAL_CORES], *collective_tgt_bars[TOTAL_CORES]; 35 | volatile unsigned char syncValues[TOTAL_CORES]; 36 | volatile struct shared_basic * sharedData; 37 | int myId, lowestCoreId; 38 | 39 | static void init_barrier(volatile e_barrier_t[], e_barrier_t *[]); 40 | 41 | /** 42 | * Core entry point, sets the stuff up and then runs the interpreter 43 | */ 44 | int main() { 45 | myId=e_group_config.core_row * e_group_config.group_cols + e_group_config.core_col; 46 | sharedData=(void*) (e_emem_config.base + EXTERNAL_MEM_ABSOLUTE_START); 47 | 48 | while (sharedData->core_ctrl[myId].core_run == 0) {}; 49 | sharedData->core_ctrl[myId].core_busy=1; 50 | sharedData->core_ctrl[myId].core_run=1; 51 | 52 | int i; 53 | lowestCoreId=TOTAL_CORES; 54 | for (i=0;icore_ctrl[i].active) { 57 | if (i< lowestCoreId) lowestCoreId=i; 58 | } 59 | } 60 | 61 | init_barrier(syncbarriers, sync_tgt_bars); 62 | init_barrier(collectivebarriers, collective_tgt_bars); 63 | 64 | if (sharedData->codeOnCores) { 65 | cpy(sharedData->edata, sharedData->esdata, sharedData->length); 66 | } 67 | 68 | syncCores(0); 69 | runIntepreter(sharedData->edata, sharedData->length, sharedData->symbol_size, myId, sharedData->num_procs, sharedData->baseHostPid); 70 | sharedData->core_ctrl[myId].core_busy=0; 71 | sharedData->core_ctrl[myId].core_run=0; 72 | return 0; 73 | } 74 | 75 | /** 76 | * Initialises an Epiphany barrier, this is based upon the version in elib, but works when core 0 is not in use 77 | * and over a subset of cores 78 | */ 79 | static void init_barrier(volatile e_barrier_t barrier_array[], e_barrier_t * target_barrier_array[]) { 80 | int i, row, col; 81 | for (i=0; iNick Brown and is [licenced](LICENCE) under BSD-2. 5 | 6 | ## Installation 7 | ePython comes pre-installed with the latest Parallella Linux image so it should be all set up and ready to go as soon as you switch the machine on. If you do want to manually install ePython then execute *make* and then *sudo make install* at the command line. Importantly you will then need to start a new bash session (either log out and log back in, or execute *bash* at the command line. 8 | 9 | For more information about installing ePython refer [here](docs/tutorial1.md), for upgrading ePython refer [here](docs/installupgrade.md) 10 | 11 | ## Hello world 12 | Create a file called hello.py: 13 | 14 | ```python 15 | print "Hello world" 16 | ``` 17 | 18 | Now execute *epython hello.py* , each core will display the Hello world message on the screen. This is an example of running code directly on the Epiphany cores and more information can be found [here](docs/tutorial1.md) 19 | 20 | You can also use ePython to offload kernels to the Epiphany and use it as an accelerator. For instance create a file called hello2.py: 21 | 22 | ```python 23 | from epython import offload 24 | 25 | @offload 26 | def helloworld(): 27 | print "Hello World" 28 | 29 | helloworld() 30 | ``` 31 | 32 | Execute *python hello2.py* and again you will see the Hello world message on the screen. This is very different from the previous example, because the code is running via CPython on the host and simply offloading this function (*helloworld*) to each Epiphany core. If you comment out the *offload* directive and rerun you will see the host display the message instead. Take a look at [this tutorial](docs/tutorial6.md) for more information and examples around offloading. 33 | 34 | ## Troubleshooting 35 | 36 | Often these are set by default, but if it complains that it can not find e-gcc or the libraries, then you will need to set these environment variables: 37 | 38 | export PATH=/opt/adapteva/esdk/tools/e-gnu/bin:$PATH 39 | export EPIPHANY_HOME=/opt/adapteva/esdk 40 | 41 | (you might want to place this in your .bashrc file) 42 | 43 | ## More advanced installation 44 | 45 | If you do not install ePython then you can still run epython from the current directory, as ./epython.sh but ensure that epython-device.elf is in the current directory when you run the interpreter. The epython.sh script will detect whether to run as sudo (earlier versions of the parallella OS) or not (later versions.) 46 | 47 | In order to include files (required for parallel functions) you must either run your Python codes in the same directory as the executables (and the modules directory) and/or export the EPYTHONPATH environment variable to point to the modules directory. When including files, by default ePython will search in the current directory, any subdirectory called modules and then the EPYTHONPATH variable, which follows the same syntax as the PATH variable. 48 | 49 | Issuing export EPYTHONPATH=$EPYTHONPATH:`pwd`/modules in the epython directory will set this to point to the current directory. You can also modify your ~/.bashrc file to contain a similiar command. For offload support you will need to export PYTHONPATH=$PYTHONPATH:`pwd`/modules/fullpython 50 | 51 | ## Rebuilding the parser/lexer 52 | To rebuild the parser and lexer too, then execute *make full* 53 | 54 | ## SREC and ELF 55 | 56 | The device executable is built in both SREC and ELF format, as of 2016 the loading of SREC on the Epiphany is deprecated and will be removed from later SDK releases. You can choose which to load via the -elf and -srec command line arguments. ELF is the default for ePython, apart from old Epiphany SDK versions which support SREC. 57 | -------------------------------------------------------------------------------- /interpreter/basictokens.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef BASICTOKENS_H_ 28 | #define BASICTOKENS_H_ 29 | 30 | #define LET_TOKEN 0x00 31 | #define STOP_TOKEN 0x01 32 | #define OR_TOKEN 0x02 33 | #define AND_TOKEN 0x03 34 | #define EQ_TOKEN 0x04 35 | #define NEQ_TOKEN 0x05 36 | #define LT_TOKEN 0x06 37 | #define GT_TOKEN 0x07 38 | #define LEQ_TOKEN 0x08 39 | #define GEQ_TOKEN 0x09 40 | #define ADD_TOKEN 0x0A 41 | #define SUB_TOKEN 0x0B 42 | #define MUL_TOKEN 0x0C 43 | #define DIV_TOKEN 0x0D 44 | #define MOD_TOKEN 0x0E 45 | #define IDENTIFIER_TOKEN 0x0F 46 | #define REAL_TOKEN 0x10 47 | #define STRING_TOKEN 0x11 48 | #define INTEGER_TOKEN 0x12 49 | #define IF_TOKEN 0x13 50 | #define FOR_TOKEN 0x14 51 | #define GOTO_TOKEN 0x15 52 | #define ARRAYACCESS_TOKEN 0x16 53 | #define ARRAYSET_TOKEN 0x17 54 | #define IFELSE_TOKEN 0x18 55 | #define POW_TOKEN 0x19 56 | #define RETURN_TOKEN 0x1A 57 | #define FNCALL_TOKEN 0x1B 58 | #define RETURN_EXP_TOKEN 0x1C 59 | #define BOOLEAN_TOKEN 0x1D 60 | #define LETNOALIAS_TOKEN 0x1E 61 | #define NONE_TOKEN 0x1F 62 | #define IS_TOKEN 0x20 63 | #define ARRAY_TOKEN 0x21 64 | #define NOT_TOKEN 0x22 65 | #define NATIVE_TOKEN 0x23 66 | #define FN_ADDR_TOKEN 0x24 67 | #define FNCALL_BY_VAR_TOKEN 0x25 68 | #define REFERENCE_TOKEN 0x26 69 | #define SYMBOL_TOKEN 0x27 70 | #define ALIAS_TOKEN 0x28 71 | 72 | #define ERR_STR_ONLYTEST_EQ 0x00 73 | #define ERR_NONE_ONLYTEST_EQ 0x01 74 | #define ERR_ONLY_ADDITION_STR 0x02 75 | #define ERR_TOO_MANY_ARR_INDEX 0x03 76 | #define ERR_NEG_ARR_INDEX 0x04 77 | #define ERR_ARR_INDEX_EXCEED_SIZE 0x05 78 | #define ERR_ONLY_DISPLAY_STR_WITH_INPUT 0x06 79 | #define ERR_OUT_OF_SHARED_HEAP_MEM 0x07 80 | #define ERR_OUT_OF_CORE_SHARED_HEAP_MEM 0x08 81 | #define ERR_OUT_OF_SHARED_STACK_MEM 0x09 82 | #define ERR_OUT_OF_CORE_SHARED_STACK_MEM 0x0A 83 | #define ERR_ONLY_SEND_INT_AND_REAL 0x0B 84 | #define ERR_SEND_TO_UNKNOWN_CORE 0x0C 85 | #define ERR_SEND_TO_INACTIVE_CORE 0x0D 86 | #define ERR_RECV_FROM_UNKNOWN_CORE 0x0E 87 | #define ERR_RECV_FROM_INACTIVE_CORE 0x0F 88 | #define ERR_SENDRECV_WITH_UNKNOWN_CORE 0x10 89 | #define ERR_FREE_ON_NON_HEAP 0x11 90 | #define ERR_INCORRECT_NUM_NATIVE_PARAMS 0x12 91 | #define ERR_UNKNOWN_NATIVE_COMMAND 0x13 92 | #define ERR_FNCALL_VAR_NOT_CONTAINING_FN_PTR 0x14 93 | #define ERR_PROBE_NOT_SUPPORTED 0x15 94 | #define ERR_NBSEND_NOT_SUPPORTED 0x16 95 | 96 | #define NATIVE_FN_RTL_ISHOST 0x00 97 | #define NATIVE_FN_RTL_ISDEVICE 0x01 98 | #define NATIVE_FN_RTL_PRINT 0x02 99 | #define NATIVE_FN_RTL_NUMDIMS 0x03 100 | #define NATIVE_FN_RTL_DSIZE 0x04 101 | #define NATIVE_FN_RTL_INPUT 0x05 102 | #define NATIVE_FN_RTL_INPUTPRINT 0x06 103 | #define NATIVE_FN_RTL_SYNC 0x07 104 | #define NATIVE_FN_RTL_GC 0x08 105 | #define NATIVE_FN_RTL_FREE 0x09 106 | #define NATIVE_FN_RTL_SEND 0x0A 107 | #define NATIVE_FN_RTL_RECV 0x0B 108 | #define NATIVE_FN_RTL_SENDRECV 0x0C 109 | #define NATIVE_FN_RTL_BCAST 0x0D 110 | #define NATIVE_FN_RTL_NUMCORES 0x0E 111 | #define NATIVE_FN_RTL_COREID 0x0F 112 | #define NATIVE_FN_RTL_REDUCE 0x11 113 | #define NATIVE_FN_RTL_ALLOCARRAY 0x12 114 | #define NATIVE_FN_RTL_ALLOCSHAREDARRAY 0x13 115 | #define NATIVE_FN_RTL_MATH 0x14 116 | #define NATIVE_FN_RTL_PROBE_FOR_MESSAGE 0x15 117 | #define NATIVE_FN_RTL_TEST_FOR_SEND 0x16 118 | #define NATIVE_FN_RTL_WAIT_FOR_SEND 0x17 119 | #define NATIVE_FN_RTL_SEND_NB 0x18 120 | #define NATIVE_FN_RTL_GLOBAL_REFERENCE 0x19 121 | #define NATIVE_FN_RTL_DEREFERENCE 0x1A 122 | #define NATIVE_FN_RTL_FLATTEN 0x1B 123 | #define NATIVE_FN_RTL_ARRAYCOPY 0x1C 124 | 125 | #endif /* BASICTOKENS_H_ */ 126 | -------------------------------------------------------------------------------- /docs/tutorial1.md: -------------------------------------------------------------------------------- 1 | # Installing and getting to grips with ePython 2 | 3 | Programming the Epiphany chip is actually very simple, and in this walk through we will be using an Epiphany version the Python programming language (ePython) as our technology. Using ePython you can go from being a complete novice to writing and running your own code on the Epiphany co-processor in 60 seconds. This walk through is intended as an introductory guide, and we will initially discuss installation & configuration of ePython, before looking at some code examples which you can then modify to further explore the concepts. 4 | 5 | ### Installing ePython 6 | ePython, our version of Python, is open source and available at GitHub. You only need to complete these install commands once, once ePython is installed it can be used as many times as you like. Log into your Parallella board as usual and issue the command 7 | 8 | ``` 9 | git clone https://github.com/mesham/epython.git 10 | ``` 11 | 12 | This will copy ePython onto your machine and a directory called epython will have been created. Next we are going to build this by issuing the make command, from the top level directory (the one you issued the git clone into) issue the following two commands: 13 | 14 | ``` 15 | cd epython 16 | make 17 | ``` 18 | 19 | The build process takes around 20-30 seconds and once complete you will be returned to the bash prompt. The next (and last) step is to install ePython to a central location by issuing the two commands below. You will be promoted for your user's password with the first command. The second command (bash) starts a new bash terminal which has the python paths correctly set and every time bash starts from now on it will be correctly configured for ePython. 20 | 21 | ``` 22 | sudo make install 23 | bash 24 | ``` 25 | 26 | Congratulations! You have installed ePython and are ready to start programming! 27 | 28 | ### Let's get coding! 29 | 30 | Open a text editor and enter the following code, then save this file as hello.py 31 | 32 | ```python 33 | print "Hello world" 34 | ``` 35 | 36 | Now issue *epython hello.py* and each Epiphany core will display the message "Hello world", along with the ID of that specific core. Well done - you have just run your first program on the Epiphany co-processor, so let's start exploring some more! The *parallel* package provides a number of functions which are useful for parallel codes. We are going to look at the *coreid* and *numcores* functions. Using the text editor, put the following code into your source file *hello.py* and reissue *epython hello.py* 37 | 38 | ```python 39 | import parallel 40 | print "Hello world from core "+coreid()+" out of "+numcores()+" cores" 41 | ``` 42 | 43 | Line one will import the parallel functions (of which *coreid* and *numcores* are members.) Line two then displays a similar message from each core as the first example, but also includes the ID of each core and total number of cores in the output. We don't have to use all Epiphany cores, one can set the number of cores via the *-d* command line argument, for instance *epython -d 5 hello.py* will only run over five Epiphany cores (you should not select a number greater than the number of physical cores.) 44 | 45 | ### You're doing great, let's look at something a bit more advanced 46 | 47 | We have been printing out information about the cores, but we can also use this in other ways too. The first code example in this section will display an even or odd message depending upon the core's id. 48 | 49 | ```python 50 | import parallel 51 | if coreid() % 2 == 0: 52 | print "Even core" 53 | else: 54 | print "Odd core" 55 | ``` 56 | 57 | Now we are going to put this all together and produce a slightly more complex example. In the following code the first core will request a number from the user (this can be an integer or float.) The *bcast* function is then called (part of the parallel package) to broadcast this number from the root process (in this case 0, the second argument to the *bcast* call) to all other cores. Each core then displays the number it has just received. 58 | 59 | ```python 60 | import parallel 61 | a=0 62 | if coreid()==0: 63 | a=input("Enter a number: ") 64 | a=bcast(a,0) 65 | print "Number is "+a 66 | ``` 67 | 68 | This is an example of a collective communication, where each process collectively works together to produce some final value. Collective communications form a major corner stone of parallel programming and broadcasting values between processes (or Epiphany cores in this case) is a fundamental aspect of many parallel codes running on modern supercomputers. This topic is covered in more depth in the second tutorial. 69 | 70 | ### Summary 71 | 72 | In this walk through we have installed Epiphany Python and then run a few simple, introductory examples to illustrate running codes on the Epiphany co-processor. We have just scratched the surface here and as you can probably imagine, there is far more to explore. ePython comes with complete documentation (doc folder) along with a number of code examples which you can play with. 73 | -------------------------------------------------------------------------------- /host/stack.c: -------------------------------------------------------------------------------- 1 | /* 2 | * stack.c 3 | * 4 | * Created on: 9 July 2015 5 | * Author: Nick Brown 6 | */ 7 | 8 | #include 9 | #include 10 | #include "stack.h" 11 | 12 | struct stack_t* getNewStack(void) { 13 | struct stack_t* newStack=(struct stack_t*) malloc(sizeof(struct stack_t)); 14 | initStack(newStack); 15 | return newStack; 16 | } 17 | 18 | void initStack(struct stack_t* stack) { 19 | stack->width=INITIAL_STACK_SIZE; 20 | stack->size=0; 21 | stack->data=(void**) malloc(sizeof(void*) * INITIAL_STACK_SIZE); 22 | stack->type=(char*) malloc(sizeof(char) * INITIAL_STACK_SIZE); 23 | } 24 | 25 | int getStackSize(struct stack_t* stack) { 26 | return stack->size; 27 | } 28 | 29 | int pop(struct stack_t* stack) { 30 | if (stack->size > 0) { 31 | int data=*((int*) stack->data[stack->size-1]); 32 | free(stack->data[--stack->size]); 33 | return data; 34 | } 35 | return -1; 36 | } 37 | 38 | void clearStack(struct stack_t* stack) { 39 | stack->size=0; 40 | free(stack->data); 41 | free(stack->type); 42 | stack->width=INITIAL_STACK_SIZE; 43 | } 44 | 45 | char* popIdentifier(struct stack_t* stack) { 46 | if (stack->size > 0) { 47 | return (char*) stack->data[--stack->size]; 48 | } 49 | return NULL; 50 | } 51 | 52 | struct memorycontainer* popExpression(struct stack_t* stack) { 53 | if (stack->size > 0) { 54 | return (struct memorycontainer*) stack->data[--stack->size]; 55 | } 56 | return NULL; 57 | } 58 | 59 | struct identifier_exp* popExpressionIdentifier(struct stack_t* stack) { 60 | if (stack->size > 0) { 61 | return (struct identifier_exp*) stack->data[--stack->size]; 62 | } 63 | return NULL; 64 | } 65 | 66 | struct identifier_exp* getExpressionIdentifierAt(struct stack_t* stack, int index) { 67 | if (stack->size > index) { 68 | return (struct identifier_exp*) stack->data[index]; 69 | } 70 | return NULL; 71 | } 72 | 73 | struct memorycontainer* getExpressionAt(struct stack_t* stack, int index) { 74 | if (stack->size > index) { 75 | return (struct memorycontainer*) stack->data[index]; 76 | } 77 | return NULL; 78 | } 79 | 80 | char* getIdentifierAt(struct stack_t* stack, int index) { 81 | if (stack->size > index) { 82 | return (char*) stack->data[index]; 83 | } 84 | return NULL; 85 | } 86 | 87 | int getTopType(struct stack_t* stack) { 88 | if (stack->size > 0) { 89 | return stack->type[stack->size]; 90 | } 91 | return 0; 92 | } 93 | 94 | int getTypeAt(struct stack_t* stack, int index) { 95 | if (stack->size > index) { 96 | return stack->type[index]; 97 | } 98 | return 0; 99 | } 100 | 101 | void push(struct stack_t* stack, int val) { 102 | stack->size++; 103 | if (stack->size >= stack->width) { 104 | stack->width*=2; 105 | stack->data=(void**) realloc(&stack->data, sizeof(void*) * stack->width); 106 | stack->type=(char*) realloc(&stack->type, sizeof(char) * stack->width); 107 | } 108 | stack->data[stack->size-1]=malloc(sizeof(int)); 109 | stack->type[stack->size-1]=1; 110 | memcpy(stack->data[stack->size-1], &val, sizeof(int)); 111 | } 112 | 113 | void pushIdentifier(struct stack_t* stack, char* val) { 114 | stack->size++; 115 | if (stack->size >= stack->width) { 116 | stack->width*=2; 117 | stack->data=(void**) realloc(&stack->data, sizeof(void*) * stack->width); 118 | stack->type=(char*) realloc(&stack->type, sizeof(char) * stack->width); 119 | } 120 | stack->data[stack->size-1]=malloc(strlen(val)+1); 121 | stack->type[stack->size-1]=2; 122 | strcpy(stack->data[stack->size-1], val); 123 | } 124 | 125 | void pushIdentifierAssgnExpression(struct stack_t* stack, char* val, struct memorycontainer* exp) { 126 | stack->size++; 127 | if (stack->size >= stack->width) { 128 | stack->width*=2; 129 | stack->data=(void**) realloc(&stack->data, sizeof(void*) * stack->width); 130 | stack->type=(char*) realloc(&stack->type, sizeof(char) * stack->width); 131 | } 132 | struct identifier_exp atom; 133 | atom.identifier=(char*) malloc(strlen(val)+1); 134 | strcpy(atom.identifier, val); 135 | atom.exp=exp; 136 | stack->data[stack->size-1]=malloc(sizeof(struct identifier_exp)); 137 | memcpy(stack->data[stack->size-1], &atom, sizeof(struct identifier_exp)); 138 | stack->type[stack->size-1]=4; 139 | } 140 | 141 | void pushExpression(struct stack_t* stack, struct memorycontainer* exp) { 142 | stack->size++; 143 | if (stack->size >= stack->width) { 144 | stack->width*=2; 145 | stack->data=(void**) realloc(&stack->data, sizeof(void*) * stack->width); 146 | stack->type=(char*) realloc(&stack->type, sizeof(char) * stack->width); 147 | } 148 | stack->data[stack->size-1]=exp; 149 | stack->type[stack->size-1]=3; 150 | } 151 | 152 | int peek(struct stack_t* stack) { 153 | if (stack->size > 0) { 154 | return *((int*) stack->data[stack->size-1]); 155 | } 156 | return -1; 157 | } 158 | -------------------------------------------------------------------------------- /host/misc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | #include "misc.h" 31 | #include "basictokens.h" 32 | 33 | void errorCheck(int value, char * errorMessage) { 34 | if (value == -1) { 35 | fprintf(stderr, "Error: %s with %d\n", errorMessage, value); 36 | exit(EXIT_FAILURE); 37 | } 38 | } 39 | 40 | char* translateErrorCodeToMessage(unsigned char errorCode) { 41 | char * errorMessage=NULL; 42 | switch (errorCode) { 43 | case ERR_STR_ONLYTEST_EQ: 44 | errorMessage="Can only test for equality with strings"; 45 | break; 46 | case ERR_NONE_ONLYTEST_EQ: 47 | errorMessage="Can only test for equality with none"; 48 | break; 49 | case ERR_ONLY_ADDITION_STR: 50 | errorMessage="Can only perform addition with strings"; 51 | break; 52 | case ERR_TOO_MANY_ARR_INDEX: 53 | errorMessage="Too many array indexes in expression"; 54 | break; 55 | case ERR_NEG_ARR_INDEX: 56 | errorMessage="Not allowed negative array indexes"; 57 | break; 58 | case ERR_ARR_INDEX_EXCEED_SIZE: 59 | errorMessage="Array index in expression exceeds array size in that dimension"; 60 | break; 61 | case ERR_ONLY_DISPLAY_STR_WITH_INPUT: 62 | errorMessage="Can only display strings with input statement"; 63 | break; 64 | case ERR_OUT_OF_SHARED_HEAP_MEM: 65 | errorMessage="Out of shared heap memory for data"; 66 | break; 67 | case ERR_OUT_OF_CORE_SHARED_HEAP_MEM: 68 | errorMessage="Out of core and shared heap memory for data"; 69 | break; 70 | case ERR_OUT_OF_SHARED_STACK_MEM: 71 | errorMessage="Out of shared stack memory for data"; 72 | break; 73 | case ERR_OUT_OF_CORE_SHARED_STACK_MEM: 74 | errorMessage="Out of core and shared stack memory for data"; 75 | break; 76 | case ERR_ONLY_SEND_INT_AND_REAL: 77 | errorMessage="Can only send integers and reals between cores"; 78 | break; 79 | case ERR_SEND_TO_UNKNOWN_CORE: 80 | errorMessage="Attempting to send to non-existent or inactive process"; 81 | break; 82 | case ERR_SEND_TO_INACTIVE_CORE: 83 | errorMessage="Attempting to send to inactive core"; 84 | break; 85 | case ERR_RECV_FROM_UNKNOWN_CORE: 86 | errorMessage="Attempting to receive from non-existent or inactive process"; 87 | break; 88 | case ERR_RECV_FROM_INACTIVE_CORE: 89 | errorMessage="Attempting to receive from inactive core"; 90 | break; 91 | case ERR_SENDRECV_WITH_UNKNOWN_CORE: 92 | errorMessage="Attempting to sendrecv with non-existent or inactive process"; 93 | break; 94 | case ERR_FREE_ON_NON_HEAP: 95 | errorMessage="Attempting to free non allocated heap memory"; 96 | break; 97 | case ERR_INCORRECT_NUM_NATIVE_PARAMS: 98 | errorMessage="Incorrect number of parameters provided to native function call"; 99 | break; 100 | case ERR_UNKNOWN_NATIVE_COMMAND: 101 | errorMessage="Unknown native command supplied to runtime library"; 102 | break; 103 | case ERR_FNCALL_VAR_NOT_CONTAINING_FN_PTR: 104 | errorMessage="Function called via a variable but this variable is not pointing to any function"; 105 | break; 106 | case ERR_PROBE_NOT_SUPPORTED: 107 | errorMessage="Message probe and non-blocking send test and wait not supported for communications with virtual cores"; 108 | break; 109 | case ERR_NBSEND_NOT_SUPPORTED: 110 | errorMessage="Non-blocking sends between device and virtual cores on the host are not yet supported"; 111 | break; 112 | } 113 | if (errorMessage != NULL) { 114 | char * msgToRet=(char*) malloc(strlen(errorMessage) + 1); 115 | strcpy(msgToRet, errorMessage); 116 | return msgToRet; 117 | } else { 118 | errorMessage="Unknown error code of"; 119 | char * msgToRet=(char*) malloc(strlen(errorMessage) + 10); 120 | sprintf(msgToRet, "%s 0x%x", errorMessage, errorCode); 121 | return msgToRet; 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /host/parser.h: -------------------------------------------------------------------------------- 1 | /* A Bison parser, made by GNU Bison 3.0.4. */ 2 | 3 | /* Bison interface for Yacc-like parsers in C 4 | 5 | Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc. 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . */ 19 | 20 | /* As a special exception, you may create a larger work that contains 21 | part or all of the Bison parser skeleton and distribute that work 22 | under terms of your choice, so long as that work isn't itself a 23 | parser generator using the skeleton or a modified version thereof 24 | as a parser skeleton. Alternatively, if you modify or redistribute 25 | the parser skeleton itself, you may (at your option) remove this 26 | special exception, which will cause the skeleton and the resulting 27 | Bison output files to be licensed under the GNU General Public 28 | License without this special exception. 29 | 30 | This special exception was added by the Free Software Foundation in 31 | version 2.2 of Bison. */ 32 | 33 | #ifndef YY_YY_PARSER_H_INCLUDED 34 | # define YY_YY_PARSER_H_INCLUDED 35 | /* Debug traces. */ 36 | #ifndef YYDEBUG 37 | # define YYDEBUG 0 38 | #endif 39 | #if YYDEBUG 40 | extern int yydebug; 41 | #endif 42 | 43 | /* Token type. */ 44 | #ifndef YYTOKENTYPE 45 | # define YYTOKENTYPE 46 | enum yytokentype 47 | { 48 | INTEGER = 258, 49 | REAL = 259, 50 | STRING = 260, 51 | IDENTIFIER = 261, 52 | NEWLINE = 262, 53 | INDENT = 263, 54 | OUTDENT = 264, 55 | DIM = 265, 56 | SDIM = 266, 57 | EXIT = 267, 58 | QUIT = 268, 59 | ELSE = 269, 60 | ELIF = 270, 61 | COMMA = 271, 62 | WHILE = 272, 63 | PASS = 273, 64 | AT = 274, 65 | FOR = 275, 66 | TO = 276, 67 | FROM = 277, 68 | NEXT = 278, 69 | GOTO = 279, 70 | PRINT = 280, 71 | INPUT = 281, 72 | IF = 282, 73 | NATIVE = 283, 74 | ADD = 284, 75 | SUB = 285, 76 | COLON = 286, 77 | DEF = 287, 78 | RET = 288, 79 | NONE = 289, 80 | FILESTART = 290, 81 | IN = 291, 82 | ADDADD = 292, 83 | SUBSUB = 293, 84 | MULMUL = 294, 85 | DIVDIV = 295, 86 | MODMOD = 296, 87 | POWPOW = 297, 88 | FLOORDIVFLOORDIV = 298, 89 | FLOORDIV = 299, 90 | MULT = 300, 91 | DIV = 301, 92 | MOD = 302, 93 | AND = 303, 94 | OR = 304, 95 | NEQ = 305, 96 | LEQ = 306, 97 | GEQ = 307, 98 | LT = 308, 99 | GT = 309, 100 | EQ = 310, 101 | IS = 311, 102 | NOT = 312, 103 | STR = 313, 104 | ID = 314, 105 | SYMBOL = 315, 106 | ALIAS = 316, 107 | LPAREN = 317, 108 | RPAREN = 318, 109 | SLBRACE = 319, 110 | SRBRACE = 320, 111 | TRUE = 321, 112 | FALSE = 322, 113 | ASSGN = 323, 114 | POW = 324 115 | }; 116 | #endif 117 | /* Tokens. */ 118 | #define INTEGER 258 119 | #define REAL 259 120 | #define STRING 260 121 | #define IDENTIFIER 261 122 | #define NEWLINE 262 123 | #define INDENT 263 124 | #define OUTDENT 264 125 | #define DIM 265 126 | #define SDIM 266 127 | #define EXIT 267 128 | #define QUIT 268 129 | #define ELSE 269 130 | #define ELIF 270 131 | #define COMMA 271 132 | #define WHILE 272 133 | #define PASS 273 134 | #define AT 274 135 | #define FOR 275 136 | #define TO 276 137 | #define FROM 277 138 | #define NEXT 278 139 | #define GOTO 279 140 | #define PRINT 280 141 | #define INPUT 281 142 | #define IF 282 143 | #define NATIVE 283 144 | #define ADD 284 145 | #define SUB 285 146 | #define COLON 286 147 | #define DEF 287 148 | #define RET 288 149 | #define NONE 289 150 | #define FILESTART 290 151 | #define IN 291 152 | #define ADDADD 292 153 | #define SUBSUB 293 154 | #define MULMUL 294 155 | #define DIVDIV 295 156 | #define MODMOD 296 157 | #define POWPOW 297 158 | #define FLOORDIVFLOORDIV 298 159 | #define FLOORDIV 299 160 | #define MULT 300 161 | #define DIV 301 162 | #define MOD 302 163 | #define AND 303 164 | #define OR 304 165 | #define NEQ 305 166 | #define LEQ 306 167 | #define GEQ 307 168 | #define LT 308 169 | #define GT 309 170 | #define EQ 310 171 | #define IS 311 172 | #define NOT 312 173 | #define STR 313 174 | #define ID 314 175 | #define SYMBOL 315 176 | #define ALIAS 316 177 | #define LPAREN 317 178 | #define RPAREN 318 179 | #define SLBRACE 319 180 | #define SRBRACE 320 181 | #define TRUE 321 182 | #define FALSE 322 183 | #define ASSGN 323 184 | #define POW 324 185 | 186 | /* Value type. */ 187 | #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED 188 | 189 | union YYSTYPE 190 | { 191 | #line 22 "epython.y" /* yacc.c:1909 */ 192 | 193 | int integer; 194 | unsigned char uchar; 195 | float real; 196 | struct memorycontainer * data; 197 | char *string; 198 | struct stack_t * stack; 199 | 200 | #line 201 "parser.h" /* yacc.c:1909 */ 201 | }; 202 | 203 | typedef union YYSTYPE YYSTYPE; 204 | # define YYSTYPE_IS_TRIVIAL 1 205 | # define YYSTYPE_IS_DECLARED 1 206 | #endif 207 | 208 | 209 | extern YYSTYPE yylval; 210 | 211 | int yyparse (void); 212 | 213 | #endif /* !YY_YY_PARSER_H_INCLUDED */ 214 | -------------------------------------------------------------------------------- /host/epython.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "parser.h" 3 | #include "stack.h" 4 | 5 | static const unsigned int TAB_WIDTH = 4; 6 | 7 | int line_num = 0, indent_caller; 8 | char * parsing_filename=NULL, *fn_decorator=NULL; 9 | int line_indent=0, fake_outdent_symbol=0; 10 | 11 | extern struct stack_t indent_stack, filenameStack, lineNumberStack; 12 | 13 | void yyget_INTEGER(YYSTYPE*, char*, size_t); 14 | void yyget_REAL (YYSTYPE*, char*, size_t); 15 | void yyget_STRING(YYSTYPE*, char*, size_t); 16 | 17 | #define yyget_IDENTIFIER yyget_STRING 18 | #define SAVE_VALUE(type) \ 19 | { \ 20 | yyget_##type(&yylval, yytext, yyleng); \ 21 | return type; \ 22 | } 23 | 24 | void yyget_INTEGER(YYSTYPE *outval, char *text, size_t len) { 25 | outval->integer = atoi(text); 26 | } 27 | 28 | void yyget_REAL(YYSTYPE *outval, char *text, size_t len) { 29 | outval->real = atof(text); 30 | } 31 | 32 | void yyget_STRING(YYSTYPE *outval, char *text, size_t len) { 33 | outval->string = text; 34 | } 35 | %} 36 | 37 | /* Python indentation handling based upon code at https://github.com/lucasb-eyer/flex-bison-indentation and 38 | http://www.benbarbour.com/implementing-python-style-indention-syntax-using-flex-bison-or-lexyacc */ 39 | 40 | O [0-7] 41 | D [0-9] 42 | NZ [1-9] 43 | L [a-zA-Z_] 44 | A [a-zA-Z_0-9] 45 | H [a-fA-F0-9] 46 | HP (0[xX]) 47 | E ([Ee][+-]?{D}+) 48 | P ([Pp][+-]?{D}+) 49 | FS (f|F|l|L) 50 | IS (((u|U)(l|L|ll|LL)?)|((l|L|ll|LL)(u|U)?)) 51 | CP (u|U|L) 52 | SP (u8|u|U|L) 53 | ES (\\(['"\?\\abfnrtv]|[0-7]{1,3}|x[a-fA-F0-9]+)) 54 | WS [ \t\v\n\f] 55 | 56 | STRING_CHARS [[:print:]]{-}[\"] 57 | 58 | %option noyywrap case-insensitive 59 | %x COMMENTS 60 | %x SINGLELINECOMMENT 61 | %x INDENT_MODE 62 | %% 63 | 64 | \<\<\<.*\n { 65 | if (parsing_filename != NULL) { 66 | pushIdentifier(&filenameStack, parsing_filename); 67 | push(&lineNumberStack, line_num); 68 | free(parsing_filename); 69 | } 70 | parsing_filename=(char*) malloc(yyleng-3); 71 | strncpy(parsing_filename, &yytext[3], yyleng-4); 72 | parsing_filename[yyleng-4]='\0'; 73 | line_num=1; 74 | } 75 | \>\>\>.*\n { 76 | if (getStackSize(&filenameStack) > 0) { 77 | parsing_filename=popIdentifier(&filenameStack); 78 | line_num=pop(&lineNumberStack); 79 | } 80 | } 81 | 82 | \/\* {BEGIN(COMMENTS);} 83 | \*\/ {BEGIN(INITIAL);} 84 | \n { ++line_num; } 85 | \n { ++line_num;BEGIN(INITIAL); return NEWLINE; } 86 | . ; 87 | 88 | " " { line_indent++; } 89 | \t { line_indent+=TAB_WIDTH; } 90 | \n { line_indent=0; } 91 | <> { if (peek(&indent_stack) > 0) { 92 | pop(&indent_stack); 93 | if (line_indent < peek(&indent_stack)) { 94 | int i; 95 | unput('\n'); 96 | for (i=0;i. { 106 | if (!fake_outdent_symbol) unput(*yytext); 107 | fake_outdent_symbol=0; 108 | if (line_indent > 0 && line_indent > peek(&indent_stack)) { 109 | push(&indent_stack, line_indent); 110 | BEGIN(indent_caller); 111 | return INDENT; 112 | } else if (line_indent < peek(&indent_stack)) { 113 | pop(&indent_stack); 114 | if (peek(&indent_stack) != -1 && line_indent != peek(&indent_stack)) { 115 | int i; 116 | for(i=0;i"|"!=" return NEQ; 151 | "<=" return LEQ; 152 | ">=" return GEQ; 153 | "<" return LT; 154 | ">" return GT; 155 | "=" return ASSGN; 156 | "==" return EQ; 157 | "^"|"**" return POW; 158 | "," return COMMA; 159 | ":" return COLON; 160 | "+" return ADD; 161 | "-" return SUB; 162 | "*" return MULT; 163 | "/" return DIV; 164 | "%" return MOD; 165 | "//" return FLOORDIV; 166 | "+=" return ADDADD; 167 | "-=" return SUBSUB; 168 | "*=" return MULMUL; 169 | "/=" return DIVDIV; 170 | "%=" return MODMOD; 171 | "**=" return POWPOW; 172 | "//=" return FLOORDIVFLOORDIV; 173 | "[" return SLBRACE; 174 | "]" return SRBRACE; 175 | "(" return LPAREN; 176 | ")" return RPAREN; 177 | "@" return AT; 178 | TRUE return TRUE; 179 | FALSE return FALSE; 180 | DEF return DEF; 181 | RETURN return RET; 182 | NONE return NONE; 183 | ELSE return ELSE; 184 | ELIF return ELIF; 185 | IN return IN; 186 | IS return IS; 187 | WHILE return WHILE; 188 | PASS return PASS; 189 | EXIT return EXIT; 190 | QUIT return QUIT; 191 | FOR return FOR; 192 | TO return TO; 193 | FROM return FROM; 194 | NEXT return NEXT; 195 | GOTO return GOTO; 196 | IF return IF; 197 | PRINT return PRINT; 198 | INPUT return INPUT; 199 | NATIVE return NATIVE; 200 | STR return STR; 201 | ID return ID; 202 | SYMBOL return SYMBOL; 203 | ALIAS return ALIAS; 204 | "#" BEGIN(SINGLELINECOMMENT); 205 | 206 | [:.;] return yytext[0]; 207 | 208 | [a-zA-Z_][a-zA-Z0-9_.]* SAVE_VALUE(IDENTIFIER); 209 | %% 210 | -------------------------------------------------------------------------------- /host/byteassembler.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef BYTEASSEMBLER_H_ 28 | #define BYTEASSEMBLER_H_ 29 | 30 | #include 31 | #include "stack.h" 32 | 33 | #define NATIVE_RTL_ISHOST_STR "rtl_ishost" 34 | #define NATIVE_RTL_ISDEVICE_STR "rtl_isdevice" 35 | #define NATIVE_RTL_PRINT_STR "rtl_print" 36 | #define NATIVE_RTL_NUMDIMS_STR "rtl_numdims" 37 | #define NATIVE_RTL_DSIZE_STR "rtl_dsize" 38 | #define NATIVE_RTL_INPUT_STR "rtl_input" 39 | #define NATIVE_RTL_INPUTPRINT_STR "rtl_inputprint" 40 | #define NATIVE_RTL_SYNC_STR "rtl_sync" 41 | #define NATIVE_RTL_GC_STR "rtl_gc" 42 | #define NATIVE_RTL_FREE_STR "rtl_free" 43 | #define NATIVE_RTL_SEND_STR "rtl_send" 44 | #define NATIVE_RTL_RECV_STR "rtl_recv" 45 | #define NATIVE_RTL_SENDRECV_STR "rtl_sendrecv" 46 | #define NATIVE_RTL_BCAST_STR "rtl_bcast" 47 | #define NATIVE_RTL_NUMCORES_STR "rtl_numcores" 48 | #define NATIVE_RTL_COREID_STR "rtl_coreid" 49 | #define NATIVE_RTL_REDUCE_STR "rtl_reduce" 50 | #define NATIVE_RTL_ALLOCATEARRAY_STR "rtl_allocatearray" 51 | #define NATIVE_RTL_ALLOCATESHAREDARRAY_STR "rtl_allocatesharedarray" 52 | #define NATIVE_RTL_MATH_STR "rtl_math" 53 | #define NATIVE_RTL_PROBE_FOR_MESSAGE_STR "rtl_probe" 54 | #define NATIVE_RTL_TEST_FOR_SEND_STR "rtl_test_for_send" 55 | #define NATIVE_RTL_WAIT_FOR_SEND_STR "rtl_wait_for_send" 56 | #define NATIVE_RTL_SEND_NB_STR "rtl_send_nonblocking" 57 | #define NATIVE_RTL_GLOBAL_REFRENCE_STR "rtl_global_reference" 58 | #define NATIVE_RTL_DEREFRENCE_STR "rtl_dereference" 59 | #define NATIVE_RTL_FLATTEN_STR "rtl_flatten" 60 | #define NATIVE_RTL_ARRAY_COPY_STR "rtl_arraycopy" 61 | 62 | extern int line_num; 63 | extern char * fn_decorator; 64 | 65 | // Used for tracking gotos and line numberings (which are resolved once the byte code is assembled) 66 | struct lineDefinition { 67 | char type; 68 | char * name; 69 | int linenumber, currentpoint; 70 | struct lineDefinition * next; 71 | }; 72 | 73 | // Tree node for the current function call and the main entry point 74 | struct function_call_tree_node { 75 | int number_of_calls; 76 | char* calledFunctions[256]; 77 | }; 78 | 79 | // A memory container, containing some bytecode, the length of the code and line definitions that relate to it 80 | struct memorycontainer { 81 | unsigned int length; 82 | char * data; 83 | struct lineDefinition * lineDefns; 84 | }; 85 | 86 | // A function definition, containing the function memory and the name of the function 87 | struct functionDefinition { 88 | char * name; 89 | struct memorycontainer * contents; 90 | int numberEntriesInSymbolTable, recursive, number_of_fn_calls, called; 91 | char ** functionCalls; 92 | }; 93 | 94 | void enterFunction(char*); 95 | unsigned short getNumberEntriesInSymbolTable(void); 96 | void setNumberEntriesInSymbolTable(unsigned short); 97 | void appendNewFunctionStatement(char*, struct stack_t*, struct memorycontainer*); 98 | void appendArgument(char*); 99 | struct memorycontainer* appendCallFunctionStatement(char*, struct stack_t*); 100 | struct memorycontainer* appendNativeCallFunctionStatement(char*, struct stack_t*, struct memorycontainer*); 101 | struct memorycontainer* appendReferenceStatement(char*); 102 | struct memorycontainer* appendSymbolStatement(char*); 103 | struct memorycontainer* appendAliasStatement(char*,struct memorycontainer*); 104 | struct memorycontainer* appendGotoStatement(int); 105 | struct memorycontainer* appendWhileStatement(struct memorycontainer*, struct memorycontainer*); 106 | struct memorycontainer* appendForStatement(char *, struct memorycontainer*, struct memorycontainer*); 107 | struct memorycontainer* appendIfStatement(struct memorycontainer*, struct memorycontainer*); 108 | struct memorycontainer* appendIfElseStatement(struct memorycontainer*, struct memorycontainer*, struct memorycontainer*); 109 | struct memorycontainer* appendArraySetStatement(char*, struct stack_t*, struct memorycontainer*); 110 | struct memorycontainer* appendLetStatement(struct memorycontainer*, struct memorycontainer*); 111 | struct memorycontainer* appendLetWithOperatorStatement(struct memorycontainer*, struct memorycontainer*, unsigned char); 112 | struct memorycontainer* appendReturnStatement(void); 113 | struct memorycontainer* appendReturnStatementWithExpression(struct memorycontainer*); 114 | struct memorycontainer* appendStopStatement(void); 115 | struct memorycontainer* appendPassStatement(void); 116 | struct memorycontainer* createStringExpression(char*); 117 | struct memorycontainer* createRealExpression(float); 118 | struct memorycontainer* createIntegerExpression(int); 119 | struct memorycontainer* createBooleanExpression(int); 120 | struct memorycontainer* createArrayExpression(struct stack_t*, struct memorycontainer*); 121 | struct memorycontainer* createNoneExpression(void); 122 | struct memorycontainer* createIdentifierExpression(char*,char); 123 | struct memorycontainer* createIdentifierArrayAccessExpression(char*, struct stack_t*); 124 | struct memorycontainer* createNumberExpression(float); 125 | struct memorycontainer* createNotExpression(struct memorycontainer*); 126 | struct memorycontainer* createOrExpression(struct memorycontainer*, struct memorycontainer*); 127 | struct memorycontainer* createAndExpression(struct memorycontainer*, struct memorycontainer*); 128 | struct memorycontainer* createEqExpression(struct memorycontainer*, struct memorycontainer*); 129 | struct memorycontainer* createIsExpression(struct memorycontainer*, struct memorycontainer*); 130 | struct memorycontainer* createNeqExpression(struct memorycontainer*, struct memorycontainer*); 131 | struct memorycontainer* createGtExpression(struct memorycontainer*, struct memorycontainer*); 132 | struct memorycontainer* createLtExpression(struct memorycontainer*, struct memorycontainer*); 133 | struct memorycontainer* createGeqExpression(struct memorycontainer*, struct memorycontainer*); 134 | struct memorycontainer* createLeqExpression(struct memorycontainer*, struct memorycontainer*); 135 | struct memorycontainer* createAddExpression(struct memorycontainer*, struct memorycontainer*); 136 | struct memorycontainer* createSubExpression(struct memorycontainer*, struct memorycontainer*); 137 | struct memorycontainer* createMulExpression(struct memorycontainer*, struct memorycontainer*); 138 | struct memorycontainer* createDivExpression(struct memorycontainer*, struct memorycontainer*); 139 | struct memorycontainer* createFloorDivExpression(struct memorycontainer*, struct memorycontainer*); 140 | struct memorycontainer* createModExpression(struct memorycontainer*, struct memorycontainer*); 141 | struct memorycontainer* createPowExpression(struct memorycontainer*, struct memorycontainer*); 142 | void addVariableIfNeeded(char*); 143 | void enterScope(void); 144 | void leaveScope(void); 145 | #endif /* BYTEASSEMBLER_H_ */ 146 | -------------------------------------------------------------------------------- /docs/tutorial5.md: -------------------------------------------------------------------------------- 1 | # Python task farms on the Epiphany 2 | Splitting problems up into tasks and running these concurrently over a number of cores is a popular approach to parallelism. In recent year this has becoming more and more popular and is seen as one of the ways in which parallel codes might be written for future machines with very large numbers of processing cores. In this tutorial we are going to look at this task approach and using ePython it is very simple to send tasks around between the Epiphany cores. 3 | 4 | Before going any further, if you have not yet used or installed ePython then it is worth following the first tutorial ([here](tutorial1.md)) which walks you though installing ePython and running a simple "hello world" example on the Epiphany cores. If you installed ePython a while ago then it is worth ensuring that you are running the latest version, instructions for upgrading are available [here](installupgrade.md) 5 | 6 | ### Remote procedure calls 7 | Remote Procedure Calls (RPC) is where a core will call a function to execute on another core, providing the arguments to that function and then obtaining any results from its execution. In Python functions are known as *first class*, which means that they can be refered to like any other value and even communicated between the Epiphany cores. 8 | 9 | ```python 10 | import parallel 11 | 12 | if (coreid()==0): 13 | send(functionToRun, 1) 14 | send(50, 1) 15 | print recv(1) 16 | elif (coreid()==1): 17 | op=recv(0) 18 | arg=recv(0) 19 | returnVal=op(arg) 20 | send(returnVal, 0) 21 | 22 | def functionToRun(a): 23 | print "Running on core 1 "+a 24 | return a+10 25 | ``` 26 | 27 | In this code core 0 there is a function called *functionToRun*, which takes one argument and returns a value. Core 0 will send this function over to core 1, along with the argument and then await a message back from core 1 which it will then display. Core 1 receives the function (*op*), then receives the argument from core 0 to run this with (into *arg*), it then executes the function and sends back the returned value to core 0. 28 | 29 | In this approach we can communicate any functions, any number of arguments and send returned values back from any cores. However there is a problem with how we have written this, namely the fact that for core 0 this is blocking, i.e. it sits idle and waits for the returned value whilst the function is being executed on core 1. This isn't ideal and not particularly parallel because there might be other functions which core 0 wants to execute on other cores. 30 | 31 | ### The taskfarm module 32 | We have seen so far that sending functions around is a nice way of executing them on other cores, but if these are possibly going to produce results and send them back we don't want to be stalling and waiting for the results. ePython comes with the *taskfarm* module which provides a way of farming tasks out to other cores and avoiding this issue of blocking for results. So now we are going to rewrite the first example but instead using functions from the *taskfarm* module: 33 | 34 | ```python 35 | import parallel 36 | import taskfarm 37 | 38 | initTaskFarm(0) 39 | 40 | if (coreid()==0): 41 | execFunction(1, functionToRun, 50) 42 | if (testFunctionFinish(1)): 43 | print "The function has executed" 44 | else: 45 | print "The function is still running" 46 | print waitFunctionFinish(1) 47 | shutdownTaskFarm() 48 | else: 49 | worker() 50 | 51 | def functionToRun(a): 52 | print "Running on core 1 "+a 53 | return a+10 54 | ``` 55 | 56 | The first function call *initTaskFarm* will initialise the task farm and the argument determines which core is the "master", i.e. which will instruct other cores to execute what functions. Every core which is not the master (in this case every core whose id is not 0) will call the *worker* function, which waits for either functions to execute (and then executes them) or for the task farm to shut down. Core 0 then calls *execFunction* which instructs the task farm to execute the *functionToRun* function on core 1 (the first argument) with the value of *50*. This *execFunction* call is non-blocking, this means that it will return as soon as the required data has been communicated to core 1 rather than when core 1 has finished executing the function itself. It is possible to determine the progress of a remotely running function on a core via the *testFunctionFinish* call and the *waitFunctionFinish* will wait for function completion on a core and return any returned values from that function. The *testFunctionFinish* function just returns true or false (representing whether the remote function has finished executing) so if you use this test, even if this call returns true, then you will still need to call *waitFunctionFinish* to retrieve a returned value. The *shutdownTaskFarm* is called from the master to command the worker cores to shutdown once they have finished running any current functions. 57 | 58 | As an exercise extend thsi example to run multiple functions over multiple worker cores (hint, if you get stuck then have a look at the [task_farm_example.py] (../examples/task_farm_example.py) which illustrates how to do this. 59 | 60 | ### Master worker 61 | 62 | In the previous section we used the terminology of "master" and "worker", this is a common approach in parallelism where one core (in this case that with the ID provided to the *initTaskFarm* function) is a master, dishing out work to all other other cores which are workers. You can see this in the diagram to the right, where the master sends out tasks and data to the workers which then execute these and send back any results and inform the master they have completed (and hence can accept another task.) Many parallel problems can be split up into this approach of master and worker, we have rewritten the estimation of PI via Monte Carlo example of ([tutorial 2](tutorial2.md)) to instead use tasks, the *taskfarm* module and this general parallelisation strategy of master-worker. 63 | 64 | ```python 65 | import parallel 66 | import taskfarm 67 | import util 68 | from math import pow 69 | from random import random 70 | 71 | initTaskFarm(0) 72 | 73 | if (coreid()==0): 74 | piVal=0.0 75 | for i in range(1,numcores()-1): 76 | execFunction(i, simulateDarts, 1000) 77 | for i in range(1,numcores()-1): 78 | piVal+=waitFunctionFinish(i) 79 | 80 | print piVal/(numcores()-1) 81 | shutdownTaskFarm() 82 | else: 83 | worker() 84 | 85 | def simulateDarts(num_darts): 86 | score=0.0 87 | j=1 88 | while j<=num_darts: 89 | x=random() 90 | y=random() 91 | 92 | if (pow(x,2) + pow(y,2) < 1.0): score+=1 93 | j+=1 94 | return 4.0 * (score/num_darts) 95 | ``` 96 | 97 | In the code core 0 is the master, this then remotely executes the *simulateDarts* function on every other core concurrently (as remember *execFunction* is non-blocking), and then when each function is executing it will block for each remote function to complete in tern (via the *waitFunctionFinish* function) and add the returned value to the running total of PI which is then divided to deduce the final value. 98 | 99 | For reasons of simplicity for the example we are just executing the *simulateDarts* function once on each worker core, if you look at the PI example of ([tutorial 2](tutorial2.md)) in more detail you will see that this works in rounds. As an exercise extend the example to include these rounds so there are multiple function calls on each core. Once you have got a simple version of this working then instead of waiting for every function in each round to complete before moving onto the next round, consider how you might use the *testFunctionFinish* to poll all workers and simply re-assign more work (i.e. calls to the *simulateDarts* function) as they become idle. 100 | 101 | ### Summary 102 | In this tutorial we have look at the concepts of tasks, remote procedure calls, task farms and master worker. It is often possible to rewrite many existing parallel codes in terms of distinct tasks which can be executed concurrently and this can form an alternative approach to parallelism. Whilst, for simplicity, we have focused on running a single task many times over all the cores (homogeneous), it is easy to see how one can provide additional functions and run tasks heterogeneously, i.e. very different tasks on different Epiphany cores. This can work well for some work loads and the tasks themselves can become quite complex and irregular, for instance involving communications. 103 | -------------------------------------------------------------------------------- /docs/tutorial6.md: -------------------------------------------------------------------------------- 1 | # Epiphany as an accelerator: offloading Python kernels 2 | 3 | The latest version of ePython makes it possible to take existing Python code and offload specific functions (we tend to call them kernels) to the Epiphany cores. This is really viewing the Epiphany as an accelerator, where codes run on the host (the Parallella) and specific computationally intensive kernels are then offloaded to the accelerator for execution. The good news is that, using ePython, it is super easy to do this! 4 | 5 | Before going any further, if you have not yet used or installed ePython then it is worth following the first tutorial ([here](tutorial1.md)) which walks you though installing ePython and running a simple "hello world" example on the Epiphany cores. If you installed ePython a while ago then it is worth ensuring that you are running the latest version, instructions for upgrading are available [here](installupgrade.md) 6 | 7 | **Important:** Unlike some other ePython tutorials, all the code snipets here are to be executed under the CPython interpreter (using the *python* command.) 8 | 9 | ### Offloading a Python function 10 | 11 | To offload a Python function onto the Epiphany we first need to import the *epython* module, in the code example below we do this at line one. Next we simply need to decorate each function to be offloaded with the *@offload* decorator. If you run this in any Python interpreter on the Parallella (via the *python* command) then you will see each Epiphany core displays the *Hello World* message and a list of size 16 is displayed, each element with the value of 30. 12 | 13 | ```python 14 | from epython import offload 15 | 16 | @offload 17 | def helloworld(a,b): 18 | print "Hello World" 19 | return a+b 20 | 21 | print helloworld(10, 20) 22 | ``` 23 | 24 | If you comment out line three (the *offload* decorator) and re-run then a single *Hello World* and 30 value is displayed. Without the decorator then the *helloworld* function runs on the Parallella only (what we call the host.) When we offload a function to the Epiphany behind the scenes it will copy your code and function arguments to the Epiphany cores. Once the function has completed then each core will send its return value back (if there is one) to the Parallella host. In this example the return value is 30 (10 plus 20) and the function call provides 16 of these values - one from each Epiphany core. 25 | 26 | ### Non-blocking asynchronous kernel launches 27 | 28 | The previous example was blocking, where execution in Python on the host will stop and wait for the kernel to run to completion on the Epiphany before continuing. We don't always want this, instead it can sometimes be nice to launch kernels on the Epiphany, then whilst these run go and do something else before grabbing the results sometime later. 29 | 30 | ```python 31 | from epython import offload 32 | 33 | @offload(async=True) 34 | def helloworld(a,b): 35 | print "Hello World" 36 | return a+b 37 | 38 | handler=helloworld(10, 20) 39 | print handler.wait() 40 | ``` 41 | 42 | In the code example above we have added the argument *async=True* to the *offload* decorator, which tells ePython to launch this function in an asynchronous, non-blocking, manner. Instead of returning the values directly from the function call (at line 8) a handler is returned which can be used to track function execution. At line 9 we are telling Python to wait upon handler completion, which will return the actual returned values from each kernel on the Epiphanies. It is also possible to use the *wait_any* call to wait for any return value (and potentially use this whilst other cores complete) as well as the *test* call which will return whether at least one kernel has completed and made its return value is available. 43 | 44 | What if you launch multiple kernels without waiting for previous ones to complete? That's absolutely fine as ePython contains a scheduler which will queue up kernel launches until the Epiphany cores are free to execute them. 45 | 46 | ### Running on a subset of the Epiphany cores 47 | 48 | Up until this point we have executed our kernel on all the Epiphany cores, but often you want to limit to a subset of the cores instead. Using arguments to the *offload* directive we can instruct ePython how many and/or what cores to run on. 49 | 50 | ```python 51 | from epython import offload 52 | 53 | @offload(auto=4) 54 | def helloworld(a,b): 55 | print "Hello World" 56 | return a+b 57 | 58 | print helloworld(10, 20) 59 | ``` 60 | 61 | In this example we have added the *auto* argment to the *offload* directive, this tells ePython to run over 4 cores - but you don't care which cores these are so to best select exactly which cores to run over (i.e. idle cores.) Instead of *auto* you can use *target*, for instance *target=[1,5,8]* which will explicitly run the kernel only on cores 1, 5 and 8. 62 | 63 | ```python 64 | from epython import offload 65 | 66 | @offload 67 | def helloworld(a,b): 68 | print "Hello World" 69 | return a+b 70 | 71 | h=helloworld(10, 20, target=[9, 10], async=True) 72 | print h.wait() 73 | ``` 74 | 75 | In the example above we have done things slightly differently - this *helloworld* function will execute asynchronously and on Epiphany cores 9 and 10 only. But we have instructed ePython to do this by arguments to the function call rather than arguments to the *offload* decorator. This provides additional flexibility, you can think of arguments to the specific function call as overriding the options provided to the decorator. For instance here by default *helloworld* will run on all cores in a blocking manner due to the arguments (or lack thereof) to the *offload* decorator. However we have overridden the behaviour just for this one specific kernel launch to execute asynchronously only on Epiphany cores 9 and 10. 76 | 77 | ### Short cuts for offload arguments 78 | 79 | Remembering the offload arguments for common calls can be a bit of a pain - hence we have also introduced the *offload_multiple* and *offload_single* decorators. These can be thought of exactly the same as the *offload* directive, but set up some pre-defined behaviour. The *offload_multiple* decorator will launch kernels in an asynchronous, non-blocking manner, on a subset of cores (the number given by the *cores* argument.) The *offload_single* decorator will launch the kernel in an asynchronous, non-blocking, manner on any single Epiphany core. 80 | 81 | ```python 82 | from epython import offload_multiple, offload_single, waitAll 83 | 84 | @offload_multiple(cores=8) 85 | def adder(a,b): 86 | return a+b 87 | 88 | @offload_single 89 | def subtractor(a,b): 90 | return a-b 91 | 92 | 93 | h1=adder(10,20) 94 | h2=subtractor(10,20) 95 | print waitAll(h1,h2) 96 | ``` 97 | 98 | In this code snippet we have two functions, an *adder* function that will run over 8 Epiphany cores and a *substractor* function that will run only on one Epiphany core. These are both launched and the *waitAll* ePython call is issued to wait for all provided handlers to complete which also returns the kernel values from the Epiphany. 99 | 100 | ### Putting it all together to find PI 101 | 102 | Back in [tutorial 2](tutorial2.md) we ran a code directly on the Epiphany cores through ePython to find the value of PI using the dartboard method. We can modify this code to instead be executed from CPython, with the computational kernel offloaded to the Epiphany cores. 103 | 104 | ```python 105 | from epython import offload 106 | 107 | @offload 108 | def findPI(darts, rounds): 109 | from random import random 110 | from math import pow 111 | mypi=0.0 112 | i=1 113 | while i<=rounds: 114 | score=0.0 115 | j=1 116 | while j<=darts: 117 | x=random() 118 | y=random() 119 | if (pow(x,2) + pow(y,2) < 1.0): 120 | score+=1 121 | j+=1 122 | mypi=mypi+4.0 * (score/darts) 123 | i+=1 124 | return mypi 125 | 126 | pi=sum(findPI(100,10)) 127 | print "Value of PI is "+str((pi/10)/16) 128 | ``` 129 | 130 | In this code the *findPI* function will run on each Epiphany core - you can see that we are also importing specific module functions in this kernel too to provide us with *random* from the *random* module and the *pow* function from the *math* module (lines 5 and 6.) As an exercise, if you comment out the offload directive (line 3) and replace the last two lines with *print findPI(100,10)/10* then this will run on the host (the Parallella) in CPython only. 131 | 132 | ### Summary 133 | 134 | In this tutorial we have looked at offloading specific functions (we often call then kernels) in an existing Python code onto the Epiphany. This is really useful, not least because ePython only supports a subset of the Python language - so being able to offload the computational kernels whilst keeping everything else unchanged on the host can make things far easier. 135 | 136 | However this is not quite the full story! What kills performance is copying data to and from an accelerator (i.e. arguments to and return values from the Epiphany kernels.) In the [next tutorial](tutorial7.md) we look at other, data focused, calls to allow us to declare accelerator resident data which kernels can then use without having to copy that data to and from the Epiphany continually. 137 | -------------------------------------------------------------------------------- /host/epython.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "byteassembler.h" 3 | #include "memorymanager.h" 4 | #include "stack.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | extern int line_num; 11 | extern char * parsing_filename; 12 | extern char * fn_decorator; 13 | void yyerror(char const*); 14 | int yylex(void); 15 | 16 | void yyerror (char const *msg) { 17 | fprintf(stderr, "%s at line %d of file %s\n", msg, line_num, parsing_filename); 18 | exit(0); 19 | } 20 | %} 21 | 22 | %union { 23 | int integer; 24 | unsigned char uchar; 25 | float real; 26 | struct memorycontainer * data; 27 | char *string; 28 | struct stack_t * stack; 29 | } 30 | 31 | %token INTEGER 32 | %token REAL 33 | %token STRING IDENTIFIER 34 | 35 | %token NEWLINE INDENT OUTDENT 36 | %token DIM SDIM EXIT QUIT ELSE ELIF COMMA WHILE PASS AT 37 | %token FOR TO FROM NEXT GOTO PRINT INPUT 38 | %token IF NATIVE 39 | 40 | %token ADD SUB COLON DEF RET NONE FILESTART IN ADDADD SUBSUB MULMUL DIVDIV MODMOD POWPOW FLOORDIVFLOORDIV FLOORDIV 41 | %token MULT DIV MOD AND OR NEQ LEQ GEQ LT GT EQ IS NOT STR ID SYMBOL ALIAS 42 | %token LPAREN RPAREN SLBRACE SRBRACE TRUE FALSE 43 | 44 | %left ADD SUB ADDADD SUBSUB 45 | %left MULT DIV MOD MULMUL DIVDIV MODMOD 46 | %left AND OR 47 | %left NEQ LEQ GEQ LT GT EQ IS ASSGN 48 | %right NOT 49 | %right POW POWPOW FLOORDIVFLOORDIV FLOORDIV 50 | 51 | %type ident declareident fn_entry 52 | %type unary_operator 53 | %type opassgn 54 | %type constant expression logical_or_expression logical_and_expression equality_expression relational_expression additive_expression multiplicative_expression value statement statements line lines codeblock elifblock identscalararray identscalararraylhs 55 | %type fndeclarationargs fncallargs commaseparray arrayaccessor 56 | 57 | %start program 58 | 59 | %% 60 | 61 | program : lines { compileMemory($1); } 62 | 63 | lines 64 | : line 65 | | lines line { $$=concatenateMemory($1, $2); } 66 | ; 67 | 68 | line 69 | : statements NEWLINE { $$ = $1; } 70 | | statements { $$ = $1; } 71 | | NEWLINE { $$ = NULL; } 72 | ; 73 | 74 | statements 75 | : statement statements { $$=concatenateMemory($1, $2); } 76 | | statement 77 | ; 78 | 79 | statement 80 | : FOR declareident IN expression COLON codeblock { $$=appendForStatement($2, $4, $6); leaveScope(); } 81 | | WHILE expression COLON codeblock { $$=appendWhileStatement($2, $4); } 82 | | IF expression COLON codeblock { $$=appendIfStatement($2, $4); } 83 | | IF expression COLON codeblock ELSE COLON codeblock { $$=appendIfElseStatement($2, $4, $7); } 84 | | IF expression COLON codeblock elifblock { $$=appendIfElseStatement($2, $4, $5); } 85 | | IF expression COLON statements { $$=appendIfStatement($2, $4); } 86 | | ELIF expression COLON codeblock { $$=appendIfStatement($2, $4); } 87 | | identscalararraylhs ASSGN expression { $$=appendLetStatement($1, $3); } 88 | | identscalararray opassgn expression { $$=appendLetWithOperatorStatement($1, $3, $2); } 89 | | PRINT expression { $$=appendNativeCallFunctionStatement("rtl_print", NULL, $2); } 90 | | EXIT LPAREN RPAREN{ $$=appendStopStatement(); } 91 | | QUIT LPAREN RPAREN{ $$=appendStopStatement(); } 92 | | fn_entry LPAREN fndeclarationargs RPAREN COLON codeblock { appendNewFunctionStatement($1, $3, $6); leaveScope(); $$ = NULL; } 93 | | RET { $$ = appendReturnStatement(); } 94 | | RET expression { $$ = appendReturnStatementWithExpression($2); } 95 | | ident LPAREN fncallargs RPAREN { $$=appendCallFunctionStatement($1, $3); } 96 | | NATIVE ident LPAREN fncallargs RPAREN { $$=appendNativeCallFunctionStatement($2, $4, NULL); } 97 | | PASS { $$=appendPassStatement(); } 98 | | AT ident { fn_decorator=(char*) malloc(strlen($2)+1); strcpy(fn_decorator, $2); $$ = NULL; } 99 | | ALIAS LPAREN ident COMMA expression RPAREN { $$=appendAliasStatement($3, $5); } 100 | ; 101 | 102 | arrayaccessor 103 | : SLBRACE expression SRBRACE { $$=getNewStack(); pushExpression($$, $2); } 104 | | arrayaccessor SLBRACE expression SRBRACE { pushExpression($1, $3); } 105 | ; 106 | 107 | fncallargs 108 | : /*blank*/ { $$=getNewStack(); } 109 | | expression { $$=getNewStack(); pushExpression($$, $1); } 110 | | fncallargs COMMA expression { pushExpression($1, $3); $$=$1; } 111 | ; 112 | 113 | fndeclarationargs 114 | : /*blank*/ { enterScope(); $$=getNewStack(); } 115 | | ident { $$=getNewStack(); enterScope(); pushIdentifier($$, $1); appendArgument($1); } 116 | | ident ASSGN expression { $$=getNewStack(); enterScope(); pushIdentifierAssgnExpression($$, $1, $3); appendArgument($1); } 117 | | fndeclarationargs COMMA ident { pushIdentifier($1, $3); $$=$1; appendArgument($3); } 118 | | fndeclarationargs COMMA ident ASSGN expression { pushIdentifierAssgnExpression($1, $3, $5); $$=$1; appendArgument($3); } 119 | ; 120 | 121 | fn_entry 122 | : DEF ident { enterFunction($2); $$=$2; } 123 | ; 124 | 125 | codeblock 126 | : NEWLINE indent_rule lines outdent_rule { $$=$3; } 127 | 128 | indent_rule 129 | : INDENT { enterScope(); } 130 | 131 | outdent_rule 132 | : OUTDENT { leaveScope(); } 133 | 134 | opassgn 135 | : ADDADD { $$=0; } 136 | | SUBSUB { $$=1; } 137 | | MULMUL { $$=2; } 138 | | DIVDIV { $$=3; } 139 | | MODMOD { $$=4; } 140 | | POWPOW { $$=5; } 141 | | FLOORDIVFLOORDIV { $$=6; } 142 | 143 | declareident 144 | : ident { $$=$1; enterScope(); addVariableIfNeeded($1); } 145 | ; 146 | 147 | elifblock 148 | : ELIF expression COLON codeblock { $$=appendIfStatement($2, $4); } 149 | | ELIF expression COLON codeblock ELSE COLON codeblock { $$=appendIfElseStatement($2, $4, $7); } 150 | | ELIF expression COLON codeblock elifblock { $$=appendIfElseStatement($2, $4, $5); } 151 | ; 152 | 153 | expression 154 | : logical_or_expression { $$=$1; } 155 | | NOT logical_or_expression { $$=createNotExpression($2); } 156 | ; 157 | 158 | logical_or_expression 159 | : logical_and_expression { $$=$1; } 160 | | logical_or_expression OR logical_and_expression { $$=createOrExpression($1, $3); } 161 | 162 | logical_and_expression 163 | : equality_expression { $$=$1; } 164 | | logical_and_expression AND equality_expression { $$=createAndExpression($1, $3); } 165 | ; 166 | 167 | equality_expression 168 | : relational_expression { $$=$1; } 169 | | equality_expression EQ relational_expression { $$=createEqExpression($1, $3); } 170 | | equality_expression NEQ relational_expression { $$=createNeqExpression($1, $3); } 171 | | equality_expression IS relational_expression { $$=createIsExpression($1, $3); } 172 | ; 173 | 174 | relational_expression 175 | : additive_expression { $$=$1; } 176 | | relational_expression GT additive_expression { $$=createGtExpression($1, $3); } 177 | | relational_expression LT additive_expression { $$=createLtExpression($1, $3); } 178 | | relational_expression LEQ additive_expression { $$=createLeqExpression($1, $3); } 179 | | relational_expression GEQ additive_expression { $$=createGeqExpression($1, $3); } 180 | ; 181 | 182 | additive_expression 183 | : multiplicative_expression { $$=$1; } 184 | | additive_expression ADD multiplicative_expression { $$=createAddExpression($1, $3); } 185 | | additive_expression SUB multiplicative_expression { $$=createSubExpression($1, $3); } 186 | ; 187 | 188 | multiplicative_expression 189 | : value { $$=$1; } 190 | | multiplicative_expression MULT value { $$=createMulExpression($1, $3); } 191 | | multiplicative_expression DIV value { $$=createDivExpression($1, $3); } 192 | | multiplicative_expression FLOORDIV value { $$=createFloorDivExpression($1, $3); } 193 | | multiplicative_expression MOD value { $$=createModExpression($1, $3); } 194 | | multiplicative_expression POW value { $$=createPowExpression($1, $3); } 195 | | STR LPAREN expression RPAREN { $$=$3; } 196 | | SLBRACE commaseparray SRBRACE { $$=createArrayExpression($2, NULL); } 197 | | SLBRACE commaseparray SRBRACE MULT value { $$=createArrayExpression($2, $5); } 198 | | INPUT LPAREN RPAREN { $$=appendNativeCallFunctionStatement("rtl_input", NULL, NULL); } 199 | | INPUT LPAREN expression RPAREN { $$=appendNativeCallFunctionStatement("rtl_inputprint", NULL, $3); } 200 | ; 201 | 202 | commaseparray 203 | : expression { $$=getNewStack(); pushExpression($$, $1); } 204 | | commaseparray COMMA expression { pushExpression($1, $3); } 205 | ; 206 | 207 | value 208 | : constant { $$=$1; } 209 | | LPAREN expression RPAREN { $$=$2; } 210 | | identscalararray { $$=$1; } 211 | | ident LPAREN fncallargs RPAREN { $$=appendCallFunctionStatement($1, $3); } 212 | | NATIVE ident LPAREN fncallargs RPAREN { $$=appendNativeCallFunctionStatement($2, $4, NULL); } 213 | | ID LPAREN ident RPAREN { $$=appendReferenceStatement($3); } 214 | | SYMBOL LPAREN ident RPAREN { $$=appendSymbolStatement($3); } 215 | ; 216 | 217 | identscalararray 218 | : ident { $$=createIdentifierExpression($1, 0); } 219 | | ident arrayaccessor { $$=createIdentifierArrayAccessExpression($1, $2); } 220 | ; 221 | 222 | identscalararraylhs 223 | : ident { $$=createIdentifierExpression($1, 1); } 224 | | ident arrayaccessor { $$=createIdentifierArrayAccessExpression($1, $2); } 225 | 226 | ident 227 | : IDENTIFIER { $$ = malloc(strlen($1)+1); strcpy($$, $1); } 228 | ; 229 | 230 | constant 231 | : INTEGER { $$=createIntegerExpression($1); } 232 | | REAL { $$=createRealExpression($1); } 233 | | unary_operator INTEGER { $$=createIntegerExpression($1 * $2); } 234 | | unary_operator REAL { $$=createRealExpression($1 * $2); } 235 | | STRING { $$=createStringExpression($1); } 236 | | TRUE { $$=createBooleanExpression(1); } 237 | | FALSE { $$=createBooleanExpression(0); } 238 | | NONE { $$=createNoneExpression(); } 239 | ; 240 | 241 | unary_operator 242 | : ADD { $$ = 1; } 243 | | SUB { $$ = -1; } 244 | ; 245 | 246 | %% 247 | -------------------------------------------------------------------------------- /docs/tutorial7.md: -------------------------------------------------------------------------------- 1 | # Epiphany as an accelerator: managing device data 2 | 3 | In [tutorial 6](tutorial6.md) we looked at using the *offload* decorator on functions (or kernels) to execute them on Epiphany cores. So far we have assumed that data is copied in (via function arguments) for each kernel execution and then copied back to the host (via the return value) once code has finished executing on the Epiphany. Transfering data from host to device and back again is actually really expensive - in the world of GPUs you need to be really careful that the cost of data transfer does not outweigh the computational benefits of the accelertor. 4 | 5 | In addition to offloading functions, it is also possible to define and manage what we call device resident data - i.e. variables that are allocated on each Epiphany core and stay in memory between kernels runs. 6 | 7 | Before going any further, if you have not yet used or installed ePython then it is worth following the first tutorial ([here](tutorial1.md)) which walks you though installing ePython and running a simple "hello world" example on the Epiphany cores. If you installed ePython a while ago then it is worth ensuring that you are running the latest version, instructions for upgrading are available [here](installupgrade.md) 8 | 9 | **Important:** Unlike some other ePython tutorials, all the code snipets here are to be executed under the CPython interpreter (using the *python* command.) 10 | 11 | ### Defining device resident data 12 | 13 | The *epython* module contains a function, *define_on_device* which will define any variable with its current state on each Epiphany core. For instance in the code snippet below the variable *a* has been declared on the host to be an array of size 10. This is then declared on each Epiphany core and each core has its own, private, copy of this variable. These copies are entirely independent and for instance there is nothing stopping the programmer changing the structure or values of the arrays on some of the cores or the host. Once you have defined some data on the Epiphanies these variables are entirely indepdent from each other, for instance changes to variable *a* on Epiphany core 0 will have no impact on any other cores or the host copy of the data. 14 | 15 | ```python 16 | from epython import offload, define_on_device, copy_from_device 17 | 18 | a=[0]*10 19 | 20 | define_on_device(a) 21 | 22 | @offload 23 | def updateA(i): 24 | from parallel import coreid 25 | a[i]=i * coreid() 26 | 27 | for i in range(10): 28 | updateA(i) 29 | 30 | print copy_from_device("a") 31 | ``` 32 | 33 | In this code snippet the host is launching the *updateA* kernel on each Epiphany core 10 times. For each kernel launch the array index is passed in and the Epiphany core will set that location in its copy of variable *a* to be *i* multiplied by the ID of the core. Most importantly you can see that variable *a* stays resident on the Epiphany cores between calls of the kernel, so we don't need to pay the penalty of copying this variable to and from the cores on every kernel launch. 34 | 35 | At the end of the code the *copy_from_device* is issued on the host to copy the device resident data held in variable *a* on every core back to the host. The host then displays this - it will display 16 arrays each of size 10 elements as there are 16 copies (one per core) of this data. 36 | 37 | You can see with the *copy_from_device* function, we are refering to the variable *a* by a string of its name (i.e. *"a"* rather than *a*.) 38 | 39 | ### Updating data on the device from the host 40 | 41 | The code snippet in this section below illustrates the *copy_to_device* function, where again we define the variable *a* on all Epiphany cores as well as the host. This is first updated on each core to be the value *19* by the call to the *updateA* kernel. Then we copy the value 99 into *a* held on each Epiphany core and overwrite the previous value *19*) with this. 42 | 43 | ```python 44 | from epython import offload, define_on_device, copy_to_device, copy_from_device 45 | 46 | a=23 47 | 48 | define_on_device(a) 49 | 50 | @offload 51 | def updateA(): 52 | a=19 53 | 54 | print copy_from_device("a") 55 | updateA() 56 | print copy_from_device("a") 57 | copy_to_device("a", 99) 58 | print copy_from_device("a") 59 | ``` 60 | 61 | We have illustrated this with single valued variable (scalars), but these functions can equally be used for arrays too. Copying data from the host to the device can be useful as a code progresses. This might be updated values from the host after it has done some more processing, or alternatively it might be a general *scratch* space you reuse between kernel calls and the host is copying in some input data for a series of kernel calls it is about to launch on the cores. 62 | 63 | ### Data transfer onto a subset of cores 64 | 65 | Exactly the same with kernels, we can use additional arguments to these data transfer functions to run them in an asynchronous, non-blocking, manner and/or perform data transfer on a subset of the cores. The code snippet below is very similar to the previous one, but on line 10 we are only copying values of *a* held on cores 7 and 8 back - so the list that is displayed is of size 2 rather than 16. At line 13 we are only changing the value of *a* to be 99 on cores 1, 5 and 9, all other cores retain the existing value of *19*. 66 | 67 | ```python 68 | from epython import offload, define_on_device, copy_to_device, copy_from_device 69 | 70 | a=23 71 | 72 | define_on_device(a) 73 | 74 | @offload 75 | def updateA(): 76 | a=19 77 | 78 | print copy_from_device("a", target=[7,8]) 79 | updateA() 80 | print copy_from_device("a") 81 | copy_to_device("a", 99, target=[1,5,9]) 82 | print copy_from_device("a") 83 | ``` 84 | 85 | ### Asynchronous data transfer 86 | 87 | So far the data transfer calls we have looked at have been used in a blocking manner. What I mean from this is that the host will stop and block until the data copy (either to or from) the Epiphany core(s) has completed before continuing. This can be really expensive and the host can be wasting time waiting for data transfers to complete rather than getting on with useful work. Of course sometimes you definately want this blocking behaviour, but equally other times it can be useful to kick off a data transfer and then go and do something else whilst this is in progress. 88 | 89 | ```python 90 | from epython import offload, define_on_device, copy_to_device, copy_from_device 91 | from random import random 92 | 93 | a=[0]*100 94 | 95 | define_on_device(a) 96 | 97 | for i in range(100): 98 | a[i]=random() 99 | 100 | h=copy_to_device("a", a, async=True, target=range(10)) 101 | 102 | @offload 103 | def add(d1,d2): 104 | return d1+d2 105 | 106 | somevalue=add(1,5, target=[11]) 107 | h.wait() 108 | 109 | h=copy_from_device("a", target=[1], async=True) 110 | 111 | print h.test() 112 | print h.wait() 113 | ``` 114 | 115 | In the code snipet here we define an array of size 100 on each Epiphany core and then fill this up with random numbers on the host. The *copy_to_device* function is issued to update this on cores 0 to 9, but this is done asynchronously so that the call completes immediately without waiting for data to have been physically copied. At this point you MUST NOT update the host variable *a* (until the call has completed) as it is actively copying data in the background. Whilst the copying is on going, the *add* function is launched on core 11 and then we use *h.wait()* to wait on the handler *h* which is tracking the asynchronous data copying (at this point you can then change *a* on the device.) 116 | 117 | Similarly at line 20 we asynchronously copy the data held in *a* from Epiphany core 0, completion is tested for at line 22 and waited on (and then displayed) at line 23. You can see that this feels very similar to the way we launch kernels asynchronously and on a subset of the Epiphany cores - this is no accident and actually behind the scenes it uses the exact same mechanism. 118 | 119 | ### Chaining data transfer and kernal launches 120 | 121 | It is often useful to chain data transfers and kernel launches, so that you can asynchronously kick off all the aspects that need to run on the Epiphany (data transfers and kernel runs) at a single point, then go ahead and do other stuff on the host. Crticically when you do this you need to be confident that the data transfer will complete before the kernel executes. The good news is that in ePython the order of launch is guaranteed to be the order in which operations are scheduled, such that if *A*, *B* and *C* are scheduled to run on a specific Epiphany core then we guarantee that they will execute in that order. 122 | 123 | ```python 124 | from epython import offload, define_on_device, copy_to_device, waitAll 125 | from random import random 126 | 127 | a=[0]*100 128 | 129 | define_on_device(a) 130 | 131 | for i in range(100): 132 | a[i]=random() 133 | 134 | @offload(async=True) 135 | def addAllA(): 136 | i=0 137 | value=0 138 | while i < size(a): 139 | value+=a[i] 140 | i+=1 141 | return value 142 | 143 | h1=copy_to_device("a", a, async=True, target=[1]) 144 | h2=addAllA(target=[0,1]) 145 | print waitAll(h1,h2) 146 | ``` 147 | 148 | The code snippet of this section illustates this concept, where an asynchronous data copy is started on core 1 (returning the handle *h1*.) Then the *addAllA* kernel is launched on cores 0 and 1 asynchronously and we wait for both handles at line 22. Due to the ordering of launch, ePython guarantees that the *addAllA* kernel will only execute on core 1 once the data transfer has completed to that core. There is no data transfer to core 0, so the kernel will launch immediately and return 0. 149 | 150 | ### Summary 151 | 152 | In this tutorial we have looked at offloading parts of Python codes onto Epiphany cores in more detail. Carefully managing device resident data, often with asynchronous data copying, can provide significant performance benefits over simply copying all data on every kernel launch. For more information you can refer to the Jacobi offload example which uses device resident data. In this example you can see we launch the two kernels for every iteration which is quite slow (we are constantly launching kernels on the Epiphany cores rather than computation!) As an exercise modify the code so that it moves the iteration loop into the kernel, so that kernels only need to launched once for the entire run. 153 | -------------------------------------------------------------------------------- /host/configuration.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include "configuration.h" 33 | #ifndef HOST_STANDALONE 34 | #include "device-support.h" 35 | #else 36 | #define TOTAL_CORES 1 37 | #endif 38 | 39 | static void parseCommandLineArguments(struct interpreterconfiguration*, int, char**); 40 | static void parseCoreActiveInfo(struct interpreterconfiguration*, char*); 41 | static int areStringsEqualIgnoreCase(char*, char*); 42 | static void displayHelp(void); 43 | 44 | /** 45 | * Given the command line arguments this will read the configuration and return the configuration structure 46 | * which has the appropriate flags set and contains strings etc 47 | */ 48 | struct interpreterconfiguration* readConfiguration(int argc, char *argv[]) { 49 | int i; 50 | struct interpreterconfiguration* configuration=(struct interpreterconfiguration*) malloc(sizeof(struct interpreterconfiguration)); 51 | configuration->intentActive=(char*) malloc(TOTAL_CORES); 52 | for (i=0;iintentActive[i]=1; 53 | configuration->displayStats=configuration->displayTiming=configuration->forceCodeOnCore= 54 | configuration->forceCodeOnShared=configuration->forceDataOnShared=configuration->displayPPCode=0; 55 | configuration->filename=configuration->compiledByteFilename=configuration->loadByteFilename=configuration->pipedInContents=NULL; 56 | parseCommandLineArguments(configuration, argc, argv); 57 | return configuration; 58 | } 59 | 60 | /** 61 | * Parses command line arguments 62 | */ 63 | static void parseCommandLineArguments(struct interpreterconfiguration* configuration, int argc, char *argv[]) { 64 | if (argc == 1) { 65 | displayHelp(); 66 | exit(0); 67 | } else { 68 | #ifdef HOST_STANDALONE 69 | configuration->hostProcs=1; 70 | #else 71 | configuration->hostProcs=0; 72 | #endif 73 | configuration->coreProcs=0; 74 | configuration->loadElf=1; 75 | configuration->loadSrec=0; 76 | configuration->fullPythonHost=0; 77 | int i, coreplacement=0; 78 | for (i=1;idisplayStats=1; 81 | } else if (areStringsEqualIgnoreCase(argv[i], "-pp")) { 82 | configuration->displayPPCode=1; 83 | } else if (areStringsEqualIgnoreCase(argv[i], "-srec")) { 84 | configuration->loadElf=0; 85 | configuration->loadSrec=1; 86 | } else if (areStringsEqualIgnoreCase(argv[i], "-elf")) { 87 | configuration->loadElf=1; 88 | configuration->loadSrec=0; 89 | } else if (areStringsEqualIgnoreCase(argv[i], "-t")) { 90 | configuration->displayTiming=1; 91 | } else if (areStringsEqualIgnoreCase(argv[i], "-fullpython")) { 92 | configuration->fullPythonHost=1; 93 | configuration->hostProcs=1; 94 | } else if (areStringsEqualIgnoreCase(argv[i], "-datashared")) { 95 | configuration->forceDataOnShared=1; 96 | } else if (areStringsEqualIgnoreCase(argv[i], "-codecore")) { 97 | configuration->forceCodeOnCore=1; 98 | } else if (areStringsEqualIgnoreCase(argv[i], "-codeshared")) { 99 | configuration->forceCodeOnShared=1; 100 | } else if (areStringsEqualIgnoreCase(argv[i], "-o")) { 101 | if (i+1 ==argc) { 102 | fprintf(stderr, "When specifying to output compiled bytes then you must provide a filename for this\n"); 103 | exit(0); 104 | } else { 105 | configuration->compiledByteFilename=argv[++i]; 106 | } 107 | } else if (areStringsEqualIgnoreCase(argv[i], "-l")) { 108 | if (i+1 ==argc) { 109 | fprintf(stderr, "When specifying to load from a byte file then you must provide a filename for this\n"); 110 | exit(0); 111 | } else { 112 | configuration->loadByteFilename=argv[++i]; 113 | } 114 | } else if (areStringsEqualIgnoreCase(argv[i], "-help")) { 115 | displayHelp(); 116 | exit(0); 117 | } else if (areStringsEqualIgnoreCase(argv[i], "-h")) { 118 | if (i+1 ==argc) { 119 | fprintf(stderr, "You must provide a number of host processes to use\n"); 120 | exit(0); 121 | } else { 122 | if (coreplacement) { 123 | fprintf(stderr, "Can not specify explicit core placement and have host virtual processes\n"); 124 | exit(0); 125 | } 126 | configuration->hostProcs=atoi(argv[++i]); 127 | if (configuration->fullPythonHost) configuration->hostProcs++; 128 | } 129 | } else if (areStringsEqualIgnoreCase(argv[i], "-d")) { 130 | if (i+1 ==argc) { 131 | fprintf(stderr, "You must provide a number of device processes to use\n"); 132 | exit(0); 133 | } else { 134 | int j, device_procs=atoi(argv[++i]); 135 | for (j=0;j<16;j++) { 136 | configuration->intentActive[j]=jpipedInContents=argv[++i]; 145 | } 146 | } else if (areStringsEqualIgnoreCase(argv[i], "-c")) { 147 | if (i+1 ==argc) { 148 | fprintf(stderr, "When specifying core placement you must provide arguments\n"); 149 | exit(0); 150 | } else { 151 | if (configuration->hostProcs > 0) { 152 | fprintf(stderr, "Can only specify explicit core placement with no host virtual processes\n"); 153 | exit(0); 154 | } 155 | coreplacement=1; 156 | parseCoreActiveInfo(configuration, argv[++i]); 157 | } 158 | } else { 159 | if (configuration->filename != NULL) { 160 | fprintf(stderr, "Only one filename can be provided, you have suppled '%s' and '%s'\n", configuration->filename, argv[i]); 161 | exit(0); 162 | } else { 163 | configuration->filename=argv[i]; 164 | } 165 | } 166 | } 167 | if (configuration->loadByteFilename == NULL && configuration->filename == NULL && configuration->pipedInContents == NULL) { 168 | fprintf(stderr, "You must supply a file to run as an argument, see -h for details\n"); 169 | exit(0); 170 | } 171 | #ifndef HOST_STANDALONE 172 | for (i=0;i<16;i++) if (configuration->intentActive[i]) configuration->coreProcs++; 173 | #endif 174 | } 175 | } 176 | 177 | /** 178 | * Determines the active cores if the user supplied -c n, can be a single integer, a list, a range or 179 | * all to select all cores 180 | */ 181 | static void parseCoreActiveInfo(struct interpreterconfiguration* configuration, char * info) { 182 | int i; 183 | if (areStringsEqualIgnoreCase(info, "all")) { 184 | for (i=0;i<16;i++) configuration->intentActive[i]=1; 185 | } else { 186 | if (strchr(info, ',') != NULL) { 187 | char vn[5]; 188 | int s; 189 | for (i=0;i<16;i++) configuration->intentActive[i]=0; 190 | while (strchr(info, ',') != NULL) { 191 | s=strchr(info, ',')-info; 192 | memcpy(vn, info, s); 193 | vn[s]='\0'; 194 | configuration->intentActive[atoi(vn)]=1; 195 | info=strchr(info, ',')+1; 196 | } 197 | configuration->intentActive[atoi(info)]=1; 198 | } else if (strchr(info, ':') != NULL) { 199 | char vn[5]; 200 | int s; 201 | s=strchr(info, ':')-info; 202 | memcpy(vn, info, s); 203 | vn[s]='\0'; 204 | int from=atoi(vn); 205 | int to=atoi(strchr(info, ':')+1); 206 | for (i=0;i<16;i++) { 207 | if (i >= from && i<= to) { 208 | configuration->intentActive[i]=1; 209 | } else { 210 | configuration->intentActive[i]=0; 211 | } 212 | } 213 | } else { 214 | for (i=0;i<16;i++) configuration->intentActive[i]=0; 215 | configuration->intentActive[atoi(info)]=1; 216 | } 217 | } 218 | } 219 | 220 | /** 221 | * Displays the help message with usage information 222 | */ 223 | static void displayHelp() { 224 | printf("Epiphany Python version %s\n", VERSION_IDENT); 225 | printf("epython [arguments] filename\n\nWhere filename is the source code to execute by default on all cores\n\nArguments\n--------\n"); 226 | #ifndef HOST_STANDALONE 227 | printf("-c placement Specify core placement; can be a single id, all, a range (a:b) or a list (a,b,c,d)\n"); 228 | printf("-d processes Specify number of process on the device\n"); 229 | printf("-h processes Specify number of process on the host\n"); 230 | printf("-t Display core run timing information\n"); 231 | printf("-codecore Placement code on each core (default up to %d bytes length)\n", CORE_CODE_MAX_SIZE); 232 | printf("-codeshared Placement code in shared memory (automatic after %d bytes in length)\n", CORE_CODE_MAX_SIZE); 233 | printf("-datashared Data (arrays and strings) stored in shared memory, storage on core is default\n"); 234 | printf("-elf Use ELF device executable\n"); 235 | printf("-srec Use SREC device executable\n"); 236 | #endif 237 | printf("-s Display parse statistics\n"); 238 | printf("-pp Display preprocessed code\n"); 239 | printf("-o filename Write out the compiled byte representation of processed Python code and exits (does not run code)\n"); 240 | printf("-l filename Loads from compiled byte representation of code and runs this\n"); 241 | printf("-help Display this help and quit\n"); 242 | } 243 | 244 | /** 245 | * Tests two strings for equality, ignoring the case - this is for case insensitive variable name matching 246 | */ 247 | static int areStringsEqualIgnoreCase(char * s1, char * s2) { 248 | size_t s1_len=strlen(s1), s2_len=strlen(s2), i; 249 | if (s1_len != s2_len) return 0; 250 | for (i=0;i 83 | We are going to estimate the value of PI via the dartboard method, which is an example of a Monte Carlo method (https://en.wikipedia.org/wiki/Monte_Carlo_method.) Basically, imagine we have a dartboard mounted on a wooden backing and the dartboard fits perfectly within this wooden backing as per the diagram. 84 | 85 | If the radius of the dartboard is one, then the area of the board will be PI, as the dartboard fits snugly on the wooden backing then the area of the wood is 4 (2 by 2.) Therefore this means the ratio of the area of the circle to that of the wood is pi/4. If we throw lots of darts at the board then randomly some will land on the board and some on the wooden backing, but by probability the ratio of the number landing on the dartboard vs the number that is thrown will be pi/4. 86 | 87 | Each Epiphany core will simulate the throwing of lots of darts at this dartboard, and by tracking the number which land on the board across all cores we can estimate PI. The more darts which are thrown, the more accurate our approximation of PI. 88 | 89 | ```python 90 | import parallel 91 | from random import random 92 | from math import pow 93 | 94 | darts=100 95 | rounds=0 96 | mypi=0.0 97 | 98 | if coreid()==0: 99 | rounds=input("Enter the number of rounds: ") 100 | rounds=bcast(rounds, 0) 101 | else: 102 | rounds=bcast(none, 0) 103 | i=1 104 | while i<=rounds: 105 | score=0.0 106 | j=1 107 | while j<=darts: 108 | x=random() 109 | y=random() 110 | 111 | if (pow(x,2) + pow(y,2) < 1.0): 112 | score=score+1 113 | j+=1 114 | mypi=mypi+4.0 * (score/darts) 115 | i+=1 116 | mypi=reduce(mypi, "sum") 117 | if coreid()==0: print "Value of PI="+(mypi/rounds)/numcores() 118 | ``` 119 | 120 | In this code each core works in rounds, throwing *darts* number of darts per round. Initially core 0 requests from the user the number of rounds to run (10 is a good starting number), which is then broadcast amongst the cores at lines 9 and 11. Remember the provided value to a broadcast collective is only relevant on the root core (in this case core 0) - you can see at line 9 that core 0 will broadcast the *rounds* value, which has been inputted by the user, and every other core at line 11 issues the broadcast call with the *none* value, which is Python's way of representing the absence of a value. What we call the computational kernel, the heart of what each core is actually doing, is at lines 13-24 which performs the Monte Carlo method and then at line 25 the values determined at each core are summed together and then displayed at line 26 by core 0. 121 | 122 | By increasing the number of rounds we increase the accuracy of the answer, but the cost is an increase in runtime. You can use the *-t* command line argument to display timing information for each core, for instance *epython -t pi.py*, run with 10, 50, 100 and 500 rounds and you will see the difference (be patient with 500 rounds it takes a few seconds!) 123 | 124 | As a general note, we have two extremes when classifying parallelism; at one end tightly coupled problems where each core must very extensively communicate with other cores and at the other end embarrassingly parallel problems where very little (if any) communication is needed. Most HPC codes sit somewhere between these extremes and this example is towards the embarrassingly parallel side, because there are only 2 communications (the initial broadcast and final reduction) and importantly there are no communications required in the computational kernel, so each core can just get on with its computational task. Communications add overhead, so it is useful to understand where a parallel code sits on this scale to give an idea of likely performance and scalability. 125 | 126 | ### Summary 127 | 128 | In this tutorial we have used ePython to introduce some of the basic building blocks of parallelism and shown how quick and easy it is to write parallel codes on the Epiphany. The PI example that we looked at is a simple illustration of a Monte Carlo method, many codes running on the latest supercomputers are based around Monte Carlo methods and more generally the ideas of core identification, point to point & collective communications form the basis of the majority of HPC codes. 129 | -------------------------------------------------------------------------------- /docs/tutorial4.md: -------------------------------------------------------------------------------- 1 | # Pipelines on the Epiphany 2 | In the previous tutorial (available [here](tutorial3.md)) we looked at splitting a problem up geometrically. Driven by the decomposition of the data, different parts of the problem ran on different Epiphany cores with these cores often needing to communuicate when a neighbouring value held on another core was required. 3 | 4 | Whilst geometric decomposition is a very common approach not all problems are suited to being split around the geometry of the data and instead this tutorial we will look at splitting up a problem based upon the flow of data, known as a pipeline. 5 | 6 | Before going any further, if you have not yet used or installed ePython then it is worth following the first tutorial ([here](tutorial1.md)) which walks you though installing ePython and running a simple "hello world" example on the Epiphany cores. If you installed ePython a while ago then it is worth ensuring that you are running the latest version, instructions for upgrading are available [here](installupgrade.md) 7 | 8 | ### Pipeline 9 | Data flows into the first stage, some processing is performed on it and a resulting value flows into the next stage, which performs processing and passes it onto the next stage etc.. Once data has been sent from one stage to the next then that stage is ready to receive some more data and start processing that. 10 | 11 | 12 | This is illustrated in the diagram, in a pipeline data only flows one way (here from left to right) and at each stage the data is refined, from its initial "raw" value to the final "finished" value. Ideally you want all stages in the pipeline to be busy, when your program starts it takes some time to fill up the pipeline and at the end the pipeline drains. The simplest approach to a pipeline will map a single stage to a single Epiphany core. 13 | 14 | ### ePython pipeline 15 | Now it's time for an example, based upon a large set of numbers we want to know the percentage of numbers that are contiguous, i.e. where the same numeric value lies one after another. This leads to a pipeline with four stages: 16 |
    17 |
  1. Stage 1: Decide the number of data elements (chosen randomly) for that specific sequence.
  2. 18 |
  3. Stage 2: Based upon the number of elements generate random numbers for each of these.
  4. 19 |
  5. Stage 3: Sorts the number sequence
  6. 20 |
  7. Stage 4: Progresses through the sequence and counts the number of contiguous elements, the percentage of which is output
  8. 21 |
22 | 23 | The input to the entire pipeline is the number of sequences to work on and the output of the pipeline is the percentage of contiguous numbers in that sequence. 24 | 25 | ```python 26 | import parallel 27 | import util 28 | from random import randrange 29 | 30 | data=[0]*510 31 | 32 | if (coreid()==0): 33 | pipelineStageOne(100) 34 | elif (coreid()==1): 35 | pipelineStageTwo() 36 | elif (coreid()==2): 37 | pipelineStageThree() 38 | elif (coreid()==3): 39 | pipelineStageFour() 40 | 41 | def pipelineStageOne(num_items): 42 | for i in range(num_items): 43 | num=randrange(500) + 5 44 | send(num, coreid()+1) 45 | send(-1,coreid()+1) 46 | 47 | def pipelineStageTwo(): 48 | num=0 49 | while num >= 0: 50 | num=recv(coreid()-1) 51 | if num > 0: 52 | i=0 53 | while i < num: 54 | data[i]=randrange(10) 55 | i+=1 56 | send(num, coreid()+1) 57 | if num > 0: send(data, coreid()+1, num) 58 | 59 | def pipelineStageThree(): 60 | num=0 61 | while num >=0: 62 | num=recv(coreid()-1) 63 | if num > 0: 64 | data=recv(coreid()-1, num) 65 | oddSort(data, num) 66 | send(num, coreid()+1) 67 | if num > 0: send(data, coreid()+1, num) 68 | 69 | def pipelineStageFour(): 70 | num=0 71 | num_contig=0.0 72 | total_num=0 73 | while num >=0: 74 | num=recv(coreid()-1) 75 | if num > 0: 76 | total_num+=num 77 | data=recv(coreid()-1, num) 78 | cnum=data[0] 79 | ccount=1 80 | i=0 81 | while i < num: 82 | if (data[i] == cnum): 83 | ccount+=1 84 | else: 85 | num_contig+=ccount 86 | cnum=data[i] 87 | ccount=0 88 | i+=1 89 | chance=(num_contig/total_num)*100 90 | print chance+"% of numbers were contiguous" 91 | ``` 92 | 93 | **This is an illustration of the code, the executable version is here** 94 | 95 | Based upon its core ID, a core will execute a specific pipeline stage function where it waits for data and, once it has received this, will process the data and send results onto the next stage. The *oddSort* function (in the util module) will perform an odd-even sort on the number sequence. At the end of the pipeline, stage one will send the value *-1* to stage two, which will then send it along to the next stage and quit. This action is repeated for the other stages and this is known a sentinal or poison pill, which will shut the pipeline down and this is the common way in which one terminates parallel pipelines. 96 | 97 | So, we now have a pipeline which passes data between the stages and each stage operates on this data. However there is a problem, namely that the amount of work per pipeline stage is very uneven. For instance stage 1 will progress very quickly, whereas stage 3 (the sorting stage) will take much longer and fast stages will be held up by the slower stages. Bear in mind though, that we are only mapping one stage to one Epiphany core, so our current pipeline is only using 4 of the Epiphany cores. Hence we have 12 idle cores and how can we take advantage of these to help address our work imbalance problem and improve performance? 98 | 99 | ### Splitting the pipeline 100 | What we are going to do here is keep stage 1 unique (i.e. on core 0), but then duplicate stages 2, 3 and 4 across all the remaining cores. This is known as a non-linear pipeline and it will look like the diagram here: 101 | 102 | 103 | Importantly this approach keeps all the cores busy and we have further parallelised the problem by adopting this splitting. Not only will each of the four stages operate in parallel, but also multiple cores will be performing the exact same stage work. 104 | 105 | ```python 106 | ..... 107 | if (coreid()==0): 108 | pipelineStageOne(100) 109 | else: 110 | if (coreid() % 3 == 1): 111 | pipelineStageTwo() 112 | elif (coreid() % 3 == 2): 113 | pipelineStageThree() 114 | else: 115 | pipelineStageFour() 116 | 117 | def pipelineStageOne(num_items): 118 | matchingpid=1 119 | for i in range(num_items): 120 | num=randrange(500) + 5 121 | send(num, matchingpid) 122 | matchingpid+=3 123 | if matchingpid > 13: matchingpid=1 124 | for i in range(1,13,3): 125 | send(-1,i) 126 | 127 | ..... 128 | ``` 129 | 130 | **This is an illustration of the code, the executable version is here** 131 | 132 | The code is very similar to the previous simple pipeline code, but stage 1 (on core 0) is maintaining a matching core ID, *matchingpid* which is sends to next. This value is increased at each stage and then wrapped around once *matchingpid* reaches over 13. 133 | 134 | Time both the simple and split versions (using the *-t* command line argument for timing information.) You should see quite a significant performance improvement by adopting this splitting approach and taking advantage of the idle cores. 135 | 136 | ### Parallelising a specific stage 137 | It is quite simple really, to improve performance we want to take advantage of the simple pipeline's idle cores. As we have seen one way is by splitting and duplicating stages. The other way is by keeping the stages exactly the same, but instead to parallelise one specific stage. In our example the sorting (stage 3) is the most expensive, so we can concentrate our idle cores onto this stage. 138 | 139 | 140 | 141 | This is illustrated in the diagram, where *Cn* represents the *nth* Epiphany core and you can see that there are 13 cores allocated to stage three. This can work very well when another pattern can easily be adopted within the stage and here we are going to use geometric decomposition, to split the data up amongst these 13 cores and do a parallel sort on it. 142 | 143 | ```python 144 | ..... 145 | 146 | if (coreid()==0): 147 | pipelineStageOne(10) 148 | elif (coreid()==1): 149 | pipelineStageTwo() 150 | elif (coreid() >= 2 and coreid() <= 14): 151 | pipelineStageThree() 152 | elif (coreid()==15): 153 | pipelineStageFour() 154 | 155 | ..... 156 | 157 | def pipelineStageTwo(): 158 | num=0 159 | while num >= 0: 160 | num=recv(coreid()-1) 161 | j=2 162 | while j<=14: 163 | if num > 0: 164 | i=0 165 | while i < num/13: 166 | data[i]=randrange(1000) 167 | i+=1 168 | send(num/13, j) 169 | send(data, j, num/13) 170 | else: 171 | send(-1, j) 172 | j+=1 173 | 174 | def pipelineStageThree(): 175 | num=0 176 | while num >=0: 177 | num=recv(1) 178 | if num > 0: 179 | data=recv(1, num) 180 | parallel_odd_even_sort(num) 181 | send(num, 15) 182 | if num > 0: send(data, 15, num) 183 | 184 | def pipelineStageFour(): 185 | rdata=[0]*100 186 | num=0 187 | num_contig=0.0 188 | total_num=0 189 | while num >=0: 190 | i=2 191 | while i<=14: 192 | num=recv(i) 193 | if (num > 0): 194 | rdata=recv(i, num) 195 | j=num*i 196 | while j 0: 201 | ..... 202 | chance=(num_contig/total_num)*100 203 | print chance+"% of numbers were contiguous" 204 | ``` 205 | 206 | **This is an illustration of the code, the executable version is here** 207 | 208 | This approach is a bit more complex as, instead of filling in the entire number sequence and passing it along, stage two of the pipeline will complete each subsequence needed for the different cores of stage three and send the specific data to its specific core. The Epiphany cores allocated to stage three then receive their subdata, perform a parallel sort on it (via the *parallel_odd_even_sort* function) and send their values onto stage four which will collate and assemble them in order to perform the final calculation. 209 | 210 | ### Summary 211 | In this tutorial we have looked at pipelines where the parallelism is oriented around the flow of data. As it flows through the pipeline's stages, data is refined until we get a final value that is output from the final stage. This approach is suited to many problems, and some that you might not nescesarily expect (such as CPU instruction pipelines.) Due to the fast interconnect between the Epiphany cores this approach of streaming data between them is potentially very advantageous - but as we have seen it is really important that each stage is equally busy at all times. If you have an uneven distribution of compution amongst the cores, or lots of idle cores, then splitting the pipeline or parallelising a specific stage can provide a significant gain. 212 | 213 | More information about pipelines can be found here. An example focussing on the ePython sequential odd-even sort algorithm that we used can be found here and the parallel version we used can be found here. 214 | -------------------------------------------------------------------------------- /docs/tutorial3.md: -------------------------------------------------------------------------------- 1 | # Geometric decomposition on the Epiphany 2 | In the previous tutorial (available [here](tutorial2.md)) we concentrated on different ways to pass messages between cores which is one of the core mechanisms of parallelism. We saw that messages can be point to point, where only two cores are involved, or collective where every core is involved. The forms of communication that you select depends upon the problem you are trying to solve, and the example we considered (finding PI via the dartboard method) fitted very well with collective communications. 3 | 4 | This tutorial will build upon tutorial two's mechanisms of parallelism in order to take a higher level view of parallel codes by considering some of the common strategies (also known as patterns) that are available to parallel programmers and widely used. This tutorial will concentrate on geometric decomposition, which is also known as domain decomposition. 5 | 6 | Before going any further, if you have not yet used or installed ePython then it is worth following the first tutorial ([here](tutorial1.md)) which walks you though installing ePython and running a simple "hello world" example on the Epiphany cores. If you installed ePython a while ago then it is worth ensuring that you are running the latest version, instructions for upgrading are available [here](installupgrade.md) 7 | 8 | ### Geometric decomposition 9 | Splitting a problem up geometrically, and allocating different chunks of the data to different cores (or processes) is a very useful technique when there is one key data structure and the major organising principal of parallelism is splitting up of the data itself. In this strategy each core performs (roughly) the same instructions, just operating upon different data. 10 | 11 | 12 | The diagram illustrates geometric decomposition in more detail, where an initially large 2D of data is split up into four chunks and each chunk is then distributed onto a different core. One of the key decisions for the parallel programmer is that of **granularity**, i.e. how many and how large these chunks should be. Granularity is very important because we want to maximise the amount of computation each core performs whilst minimising the communication between cores (which is an overhead of parallelism.) It is a trade-off, for instance in the diagram we only have four large chunks so only four cores can be utilised and these might have a very significant amount of computation to perform. At the other extreem, if we were to split the data into very many smaller chunks, then the cost of communication will likely dominate because each core only has a small amount of computation but very many cores results in lots of communications and cores are predominantly waiting for these communications to complete. 13 | 14 | ### Jacobi iterative method 15 | We are going to look at an algorithm very commonly used in HPC, namely an iterative method (the Jacobi method) to solve a partial differential equation (PDE.) We will be focussing on Laplace's equation for diffusion, and you can think of a long pipe where we know the value of some pollutant at each end but not throughout the pipe iteself. Based upon this pipe and initial values we want to deduce how the pollution diffuses throughout. In order to solve this problem we split the pipe up into a number of distinct cells and set the values at the left most and right most cells (called the boundary conditions.) For every other cell, the value in that cell depends upon the values held in the neighbouring cells - which themselves depend upon their neighbours. The algorithm works in iterations, where each iteration will update all the unknown values and so progresses towards the final answer. At each iteration we calculate the residual which tells us how far away from the answer the current solution is and we will keep iterating until this residual is small enough to match a predetermined termination accuracy. 16 | 17 | ### Halo swapping 18 | In order to parallelise this problem we are going to split up the pipe geometrically and allocate different chunks to different Epiphany cores. I have already mentioned that the value at each cell depends upon its neighbouring cells, this is called the *stencil* and in this case we have a stencil size of one (we we only care about the direct neighbour in each direction.) What this tells us is that the majority of our computation will be local, but the calculation for the first and last points in a chunk (held by a core) will require a non-local neighbour's value (held on a different core.) 19 | 20 | 21 | This is illustrated by the diagram, where the top image illustrates a pipe where we are solving 15 unknown pollution elements (empty boxes) and the left most and right most boundary condition (shaded) values are provided. This is then split up into three chunks in the lower illustration, each with 5 elements (empty boxes), and each chunk is allocated on a different core. It can be seen that for each chunk of data, there are actually seven elements - the five empty elements and one shaded on the left and one shaded on the right. These shaded elements are known as halos (or ghosts) and represent the neighbouring value required for the first and last local elements. A halo swap, where cores communicate neighbouring values, is performed en-mass at the start of each iteration and-so when it comes to the computation all the data a core requires is already present. Halo swapping results in fewer, larger messages (which is far more efficient than many smaller messages) and is a very common technique employed by HPC programmers. 22 | 23 | ### ePython code 24 | Now we have looked at some of the fundamental concepts underlying geometric decomposition and our example, it is time to get to the code! 25 | 26 | ```python 27 | import parallel 28 | from math import sqrt 29 | 30 | DATA_SIZE=100 31 | MAX_ITS=100000 32 | 33 | # Work out the amount of data to hold on this core 34 | local_size=DATA_SIZE/numcores() 35 | if local_size * numcores() != DATA_SIZE: 36 | if (coreid() < DATA_SIZE-local_size*numcores()): local_size=local_size+1 37 | 38 | # Allocate the two arrays (two as this is Jacobi) we +2 to account for halos/boundary conditions 39 | data=[0]*(local_size+2) 40 | data_p1=[0]*(local_size+2) 41 | 42 | # Set the initial conditions 43 | i=0 44 | while i<=local_size+1: 45 | data[i]=0.0 46 | i+=1 47 | 48 | if coreid()==0: data[0]=1.0 49 | if coreid()==numcores()-1: data[local_size+1]=10.0 50 | 51 | # Compute the initial absolute residual 52 | tmpnorm=0.0 53 | i=1 54 | while i<=local_size: 55 | tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2 56 | i+=1 57 | tmpnorm=reduce(tmpnorm, "sum") 58 | bnorm=sqrt(tmpnorm) 59 | 60 | norm=1.0 61 | its=0 62 | while norm >= 1e-4 and its < MAX_ITS: 63 | # Halo swap to my left and right neighbours if I have them 64 | if (coreid() > 0): data[0]=sendrecv(data[1], coreid()-1) 65 | if (coreid() < numcores()-1): data[local_size+1]=sendrecv(data[local_size], coreid()+1) 66 | 67 | # Calculate current residual 68 | tmpnorm=0.0 69 | i=1 70 | while i<=local_size: 71 | tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2 72 | i+=1 73 | tmpnorm=reduce(tmpnorm, "sum") 74 | norm=sqrt(tmpnorm)/bnorm 75 | 76 | if coreid()==0 and its%1000 == 0: print "RNorm is "+norm+" at "+its+" iterations" 77 | 78 | # Performs the Jacobi iteration for Laplace 79 | i=1 80 | while i<=local_size: 81 | data_p1[i]=0.5* (data[i-1] + data[i+1]) 82 | i+=1 83 | 84 | # Swap local data around for next iteration 85 | i=1 86 | while i<=local_size: 87 | data[i]=data_p1[i] 88 | i+=1 89 | its+=1 90 | 91 | if coreid()==0: print "Completed in "+its+" iterations, RNorm="+norm 92 | ``` 93 | 94 | Copy this into a file named *jacobi.py* and execute *epython jacobi.py* (it is also provided in *examples/jacobi.py*), this will execute over all 16 Epiphany cores and you will see something like: 95 | 96 | ``` 97 | [device 0] RNorm is 1.000000 at 0 iterations 98 | [device 0] RNorm is 0.004219 at 1000 iterations 99 | [device 0] RNorm is 0.002365 at 2000 iterations 100 | [device 0] RNorm is 0.001449 at 3000 iterations 101 | [device 0] RNorm is 0.000893 at 4000 iterations 102 | [device 0] RNorm is 0.000552 at 5000 iterations 103 | [device 0] RNorm is 0.000341 at 6000 iterations 104 | [device 0] RNorm is 0.000209 at 7000 iterations 105 | [device 0] RNorm is 0.000129 at 8000 iterations 106 | [device 0] Completed in 8500 iterations, RNorm=0.000100 107 | ``` 108 | 109 | At the top of the code the *DATA_SIZE* variable sets the global length of the pipe (100 in this case) and initially the cores will split up the pipe and determine how much data they hold locally (in *local_size*) before allocating the arrays *data* and *data_p1* to hold their local data. You can see that each core actually allocates *local_size+2* data elements, we have 2 extra elements for the left and right halos. 110 | 111 | The initial absolute residual is then calculated which deduces how far away from the final answer the initial setup lies, each core calculates the local residual and than the *reduce* collective communication call is used to sum these up to a global value. 112 | 113 | At line 35 each core will begin iterating and directly after this the halo swap is performed via the *sendrecv* communication calls which combine both sending to and receiving from a core into one operation. The residual (how far the solution is from the final answer) is calculated for each iteration (again using the *reduce* collective) and this is then taken relative to the initial residual to determine how far the solution has progressed which is one of our termination criteria. 114 | 115 | This Jacobi method, whilst it is the slowest iterative solver, has some nice properties. One such property is that, given a fixed global problem size, irrespective of the number of cores you run with the progression towards the final answer at each iteration should be the same. You can display timing information via the *-t* command line argument, time a run with all 16 Epiphany cores and then run it only using 3 (*-d 3* command line argument) cores, the runtime will increase because we have fewer cores doing more work. 116 | 117 | No surprises so far, but remember at the start of the tutorial we discussed the granularity of the decomposition (fewer larger data chunks or many smaller chunks.) With this default pipe length of 100, 3 chunks is too few but actually 16 chunks is too many. Fixing the global problem size and varying the number of cores is an example of **strong scaling**, and running with abount 8 Epiphany cores is the optimum. Smaller core counts are slower because computations rule and larger core counts are slower because communication costs rule. Many people assume that simply throwing cores at a problem will speed it up, but as we have seen that is certainly not the case and often beyond a specific optimum (8 cores here), increasing the number of cores will actually slow down your code run. 118 | 119 | If you double the global problem size (**weak scaling**) to 200 via the *DATA_SIZE* variable and rerun with 8 and 16 cores then you will see that 16 cores is faster than 8 cores in this scenario because there is still plenty of computational work for each core to perform at that granularity. In your home directory is a utility called *ztemp.sh*, which reports the heat of your Parallella, run this when the board is idle (for a baseline), then run the Jacobi example and, depending on how good your cooling solution is, you should see an increase in temperature when you run *ztemp.sh* again. 120 | 121 | ### Summary 122 | In this tutorial we have looked at geometric decomposition (also known as domain decomposition) which is a very common strategy for parallelism when your code is oriented around some key data structure(s) which can easily be split up. Iterative methods are very commonly used on supercomputers for solving systems of linear equations (such as Laplace's PDE here) and Jacobi is one of these methods. Iterative methods, as well as many other computational algorithms, lend themselves towards geometric decomposition and this way of splitting the problem up feels very natural in these cases. 123 | 124 | More information about Geometric decomposition can be found [here](http://parlab.eecs.berkeley.edu/wiki/patterns/geometric_decomposition) and more information about iterative methods can be found [here](http://www.maa.org/press/periodicals/loci/joma/iterative-methods-for-solving-iaxi-ibi-introduction-to-the-iterative-methods) 125 | -------------------------------------------------------------------------------- /host/memorymanager.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Nick Brown 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "memorymanager.h" 32 | 33 | // This is set at the end of parsing to be the entire byte code representation of the users Python program 34 | struct memorycontainer* assembledMemory=NULL; 35 | // This is the function list 36 | struct functionListNode* functionListHead=NULL; 37 | // Exportable view of the functions and their location in the byte code 38 | struct exportableFunctionTableNode* exportableFunctionTable=NULL; 39 | int numberExportableFunctionsInTable=0; 40 | 41 | struct function_call_tree_node mainCodeCallTree; 42 | 43 | static void determineUsedFunctions(void); 44 | static void processUsedFunction(struct functionDefinition*); 45 | static unsigned short findLocationOfLineNumber(struct lineDefinition*, int); 46 | static unsigned short findLocationOfFunctionName(struct lineDefinition*, char*, int, int); 47 | static struct functionDefinition* findFunctionDefinition(char*); 48 | static int doesFunctionAlreadyExistInExportableTable(char*); 49 | 50 | int getNumberOfSymbolEntriesNotUsed(void) { 51 | int ignoreSymbolEntries=0; 52 | struct functionListNode * fnHead=functionListHead; 53 | while (fnHead != NULL) { 54 | if (!fnHead->fn->called) ignoreSymbolEntries+=fnHead->fn->numberEntriesInSymbolTable; 55 | fnHead=fnHead->next; 56 | } 57 | return ignoreSymbolEntries; 58 | } 59 | 60 | /** 61 | * Compiles the memory by going through and resolving relative links (i.e. gotos) and adds a stop at the end 62 | */ 63 | void compileMemory(struct memorycontainer* memory) { 64 | int i; 65 | determineUsedFunctions(); 66 | struct memorycontainer* stopStatement=appendStopStatement(); 67 | if (memory != NULL) { 68 | struct memorycontainer* compiledMem=concatenateMemory(memory, stopStatement); 69 | struct functionListNode * fnHead=functionListHead; 70 | while (fnHead != NULL) { 71 | if (fnHead->fn->called) compiledMem=concatenateMemory(compiledMem, fnHead->fn->contents); 72 | if (fnHead->fn->functionCalls != NULL) { 73 | for (i=0;ifn->number_of_fn_calls;i++) { 74 | free(fnHead->fn->functionCalls[i]); 75 | } 76 | free(fnHead->fn->functionCalls); 77 | } 78 | fnHead=fnHead->next; 79 | } 80 | struct lineDefinition * root=compiledMem->lineDefns, *r2; 81 | while (root != NULL) { 82 | if (root->type==1) { 83 | unsigned short lineLocation=findLocationOfLineNumber(compiledMem->lineDefns, root->linenumber); 84 | memcpy(&compiledMem->data[root->currentpoint], &lineLocation, sizeof(unsigned short)); 85 | } else if (root->type==3 || root->type==4 || root->type==2) { 86 | unsigned short lineLocation=findLocationOfFunctionName(compiledMem->lineDefns, root->name, root->linenumber, root->type==4); 87 | if (root->type==3 || root->type==4) { 88 | memcpy(&compiledMem->data[root->currentpoint], &lineLocation, sizeof(unsigned short)); 89 | } 90 | if (!doesFunctionAlreadyExistInExportableTable(root->name)) { 91 | struct exportableFunctionTableNode* newExportableNode=(struct exportableFunctionTableNode*) malloc(sizeof(struct exportableFunctionTableNode)); 92 | newExportableNode->functionLocation=lineLocation; 93 | newExportableNode->functionName=(char*) malloc(strlen(root->name)+1); 94 | strcpy(newExportableNode->functionName, root->name); 95 | newExportableNode->next=exportableFunctionTable; 96 | exportableFunctionTable=newExportableNode; 97 | numberExportableFunctionsInTable++; 98 | } 99 | } 100 | root=root->next; 101 | } 102 | // Clear up the memory used for these line definition nodes 103 | root=compiledMem->lineDefns; 104 | while (root != NULL) { 105 | r2=root->next; 106 | free(root); 107 | root=r2; 108 | } 109 | assembledMemory=compiledMem; 110 | } else { 111 | assembledMemory=stopStatement; 112 | } 113 | } 114 | 115 | /** 116 | * Determines whether a specific function of a specific name already exists in the exportable global function table 117 | */ 118 | static int doesFunctionAlreadyExistInExportableTable(char* functionName) { 119 | struct exportableFunctionTableNode* root=exportableFunctionTable; 120 | while (root != NULL) { 121 | if (strcmp(root->functionName, functionName) == 0) return 1; 122 | root=root->next; 123 | } 124 | return 0; 125 | } 126 | 127 | /** 128 | * Determines the used (i.e. called by the code) functions, driven from the main function 129 | */ 130 | static void determineUsedFunctions(void) { 131 | int i; 132 | for (i=0;icalled) processUsedFunction(defn); 136 | } 137 | } 138 | } 139 | 140 | /** 141 | * Marks the current function as used (i.e. called from code), if it has not already been processed will then 142 | * go and examine all the called functions from this 143 | */ 144 | static void processUsedFunction(struct functionDefinition* specificFunction) { 145 | specificFunction->called=1; 146 | if (specificFunction->functionCalls != NULL) { 147 | int i; 148 | for (i=0;inumber_of_fn_calls;i++) { 149 | struct functionDefinition* defn=findFunctionDefinition(specificFunction->functionCalls[i]); 150 | if (defn != NULL) { 151 | if (!defn->called) processUsedFunction(defn); 152 | } 153 | } 154 | } 155 | } 156 | 157 | /** 158 | * Adds a function to the function list which are all combined in the compile memory function 159 | */ 160 | void addFunction(struct functionDefinition* functionDefintion) { 161 | struct functionListNode * node=(struct functionListNode*) malloc(sizeof(struct functionListNode)); 162 | node->fn=functionDefintion; 163 | node->next=functionListHead; 164 | functionListHead=node; 165 | } 166 | 167 | static struct functionDefinition* findFunctionDefinition(char * functionName) { 168 | struct functionListNode * node=functionListHead; 169 | while (node != NULL) { 170 | if (strcmp(node->fn->name, functionName) == 0) return node->fn; 171 | node=node->next; 172 | } 173 | return NULL; 174 | } 175 | 176 | int getNumberSymbolTableEntriesForRecursion(void) { 177 | int r=0; 178 | struct functionListNode * fnHead=functionListHead; 179 | while (fnHead != NULL) { 180 | if (fnHead->fn->recursive && fnHead->fn->called) r+=fnHead->fn->numberEntriesInSymbolTable; 181 | fnHead=fnHead->next; 182 | } 183 | return r; 184 | } 185 | 186 | /** 187 | * Given a line number will return the byte location of this in the memory 188 | */ 189 | static unsigned short findLocationOfLineNumber(struct lineDefinition * root, int lineNumber) { 190 | while (root != NULL) { 191 | if (root->type==0 && root->linenumber == lineNumber) return (unsigned short) root->currentpoint; 192 | root=root->next; 193 | } 194 | fprintf(stderr, "Can not find line %d in goto\n", lineNumber); 195 | exit(0); 196 | } 197 | 198 | /** 199 | * Finds the location of a function name and returns this or raises an error if the function is not found 200 | */ 201 | static unsigned short findLocationOfFunctionName(struct lineDefinition * root, char * functionName, int line_num_for_error, int isvarorfn) { 202 | while (root != NULL) { 203 | if (root->type==2 && strcmp(root->name, functionName) == 0) return (unsigned short) root->currentpoint; 204 | root=root->next; 205 | } 206 | if (isvarorfn) { 207 | fprintf(stderr, "Can not find variable or function '%s' in assignment at line number %d\n", functionName, line_num_for_error); 208 | } else { 209 | fprintf(stderr, "Can not find function '%s' in function call at line number %d\n", functionName, line_num_for_error); 210 | } 211 | exit(0); 212 | } 213 | 214 | /** 215 | * Concatenates two memory structures together and returns the result of this 216 | */ 217 | struct memorycontainer* concatenateMemory(struct memorycontainer* m1, struct memorycontainer* m2) { 218 | if (m1 == NULL) return m2; 219 | if (m2 == NULL) return m1; 220 | struct memorycontainer* memoryContainer = (struct memorycontainer*) malloc(sizeof(struct memorycontainer)); 221 | memoryContainer->length=m1->length + m2->length; 222 | memoryContainer->data=malloc(memoryContainer->length); 223 | memoryContainer->lineDefns=m1->lineDefns; 224 | if (m1->data != NULL && m1->length > 0) memcpy(memoryContainer->data, m1->data, m1->length); 225 | if (m2->data != NULL && m2->length > 0) memcpy(&memoryContainer->data[m1->length], m2->data, m2->length); 226 | struct lineDefinition * root=m2->lineDefns, *r2; 227 | while (root != NULL) { 228 | root->currentpoint+=m1->length; 229 | r2=root->next; 230 | root->next=memoryContainer->lineDefns; 231 | memoryContainer->lineDefns=root; 232 | root=r2; 233 | } 234 | // Free up the m1 and m2 memory 235 | free(m1->data); 236 | free(m1); 237 | free(m2->data); 238 | free(m2); 239 | return memoryContainer; 240 | } 241 | 242 | struct memorycontainer* cloneMemory(struct memorycontainer* m1) { 243 | struct memorycontainer* memoryContainer = (struct memorycontainer*) malloc(sizeof(struct memorycontainer)); 244 | memoryContainer->length=m1->length; 245 | memoryContainer->data=malloc(memoryContainer->length); 246 | memoryContainer->lineDefns=m1->lineDefns; 247 | if (m1->data != NULL && m1->length > 0) memcpy(memoryContainer->data, m1->data, m1->length); 248 | return memoryContainer; 249 | } 250 | 251 | /** 252 | * Appends a statement to some memory and returns the new current location (for next entry) 253 | */ 254 | unsigned int appendStatement(struct memorycontainer* memory, unsigned char command, unsigned int position) { 255 | memcpy(&memory->data[position], &command, sizeof(unsigned char)); 256 | position+=sizeof(unsigned char); 257 | return position; 258 | } 259 | 260 | /** 261 | * Appends a variable to some memory and returns the new current location (for next entry) 262 | */ 263 | unsigned int appendVariable(struct memorycontainer* memory, unsigned short variableid, unsigned int position) { 264 | memcpy(&memory->data[position], &variableid, sizeof(short)); 265 | position+=sizeof(short); 266 | return position; 267 | } 268 | 269 | /** 270 | * Appends some memory to some other existing memory at a specific location 271 | */ 272 | unsigned int appendMemory(struct memorycontainer* memory, struct memorycontainer* statement, unsigned int position) { 273 | memcpy(&memory->data[position], statement->data, statement->length); 274 | 275 | struct lineDefinition * root=statement->lineDefns, *r2; 276 | while (root != NULL) { 277 | root->currentpoint+=position; 278 | r2=root->next; 279 | root->next=memory->lineDefns; 280 | memory->lineDefns=root; 281 | root=r2; 282 | } 283 | position+=statement->length; 284 | 285 | // Free up the statement memory 286 | free(statement->data); 287 | free(statement); 288 | return position; 289 | } 290 | 291 | /** 292 | * Gets the length of the assembled memory 293 | */ 294 | unsigned int getMemoryFilledSize() { 295 | if (assembledMemory == NULL) return 0; 296 | return assembledMemory->length; 297 | } 298 | 299 | /** 300 | * Sets the length of the assembled memory (when loading from bytecode file) 301 | */ 302 | void setMemoryFilledSize(unsigned int size) { 303 | if (assembledMemory == NULL) assembledMemory= (struct memorycontainer*) malloc(sizeof(struct memorycontainer)); 304 | assembledMemory->length=size; 305 | } 306 | 307 | /** 308 | * Gets the bytecode in the assembled memory 309 | */ 310 | char * getAssembledCode() { 311 | if (assembledMemory == NULL) return NULL; 312 | return assembledMemory->data; 313 | } 314 | 315 | /** 316 | * Sets the code in the assembled memory (when loading from bytecode file) 317 | */ 318 | void setAssembledCode(char * a) { 319 | if (assembledMemory == NULL) assembledMemory= (struct memorycontainer*) malloc(sizeof(struct memorycontainer)); 320 | assembledMemory->data=a; 321 | } 322 | --------------------------------------------------------------------------------