├── docs
    ├── pipeline.jpg
    ├── masterworker.png
    ├── decomposition.jpg
    ├── split_pipeline.jpg
    ├── parallel_pipeline.jpg
    ├── SciPy-20-landscape-v1d6.pdf
    ├── installupgrade.md
    ├── tutorial1.md
    ├── tutorial5.md
    ├── tutorial6.md
    ├── tutorial7.md
    ├── tutorial2.md
    ├── tutorial4.md
    └── tutorial3.md
├── modules
    ├── memory.py
    ├── random.py
    ├── concurrency.py
    ├── coprocessor.py
    ├── util.py
    ├── math.py
    ├── parallel.py
    ├── array.py
    └── taskfarm.py
├── examples
    ├── simpleplus.py
    ├── hello.py
    ├── p2pcomm.py
    ├── input.py
    ├── coreidentity.py
    ├── broadcast.py
    ├── reduction.py
    ├── loops.py
    ├── functions.py
    ├── arrays.py
    ├── synccores.py
    ├── pi_offload.py
    ├── controlflow.py
    ├── mandlebrot.py
    ├── odd-even-sort.py
    ├── task_farm_example.py
    ├── task_farm_pi.py
    ├── pi.py
    ├── haloswap.py
    ├── jacobi_offload.py
    ├── pipeline.py
    ├── parallel-odd-even-sort.py
    ├── split_pipeline.py
    ├── jacobi.py
    ├── gauss-seidel.py
    ├── mergesort.py
    └── parallel_pipeline.py
├── .gitignore
├── device
    ├── makefile
    ├── main.h
    └── main.c
├── epython.sh
├── host
    ├── host-functions.h
    ├── makefile
    ├── stack.h
    ├── python_interoperability.h
    ├── misc.h
    ├── configuration.h
    ├── device-support.h
    ├── memorymanager.h
    ├── stack.c
    ├── misc.c
    ├── parser.h
    ├── epython.l
    ├── byteassembler.h
    ├── epython.y
    ├── configuration.c
    └── memorymanager.c
├── LICENCE
├── makefile
├── shared.h
├── interpreter
    ├── functions.h
    ├── interpreter.h
    └── basictokens.h
└── README.md


/docs/pipeline.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mesham/epython/HEAD/docs/pipeline.jpg


--------------------------------------------------------------------------------
/docs/masterworker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mesham/epython/HEAD/docs/masterworker.png


--------------------------------------------------------------------------------
/docs/decomposition.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mesham/epython/HEAD/docs/decomposition.jpg


--------------------------------------------------------------------------------
/docs/split_pipeline.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mesham/epython/HEAD/docs/split_pipeline.jpg


--------------------------------------------------------------------------------
/docs/parallel_pipeline.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mesham/epython/HEAD/docs/parallel_pipeline.jpg


--------------------------------------------------------------------------------
/modules/memory.py:
--------------------------------------------------------------------------------
1 | def free(a):
2 |     native rtl_free(a)
3 | 
4 | def gc():
5 |     native rtl_gc()
6 | 


--------------------------------------------------------------------------------
/docs/SciPy-20-landscape-v1d6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mesham/epython/HEAD/docs/SciPy-20-landscape-v1d6.pdf


--------------------------------------------------------------------------------
/examples/simpleplus.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Simple example of integer and real addition
 3 | To run: epython simpleplus.py
 4 | */
 5 | 
 6 | a=12
 7 | b=2.3
 8 | print a
 9 | a=a+b
10 | print b
11 | 


--------------------------------------------------------------------------------
/examples/hello.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | A simple hello world with string assignment and concatenation
 3 | To run: epython hello.py
 4 | */
 5 | 
 6 | a="hello"
 7 | b="world"
 8 | c=a+" "+b
 9 | print c
10 | 


--------------------------------------------------------------------------------
/modules/random.py:
--------------------------------------------------------------------------------
1 | def randint(a, b):
2 | 	return (native rtl_math(14) % (b-a)) + a
3 | 
4 | def randrange(a):
5 |     return native rtl_math(14) % a
6 | 
7 | def random():
8 |     return native rtl_math(14) % 20000 / 20001.0
9 | 


--------------------------------------------------------------------------------
/modules/concurrency.py:
--------------------------------------------------------------------------------
1 | def expose(data, pid):
2 | 	global_ref=native rtl_global_reference(id(data))
3 | 	native rtl_send(global_ref,pid)
4 | 
5 | def access(src):
6 | 	ref=native rtl_recv(src)
7 | 	return native rtl_dereference(ref)


--------------------------------------------------------------------------------
/examples/p2pcomm.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Illustration of P2P blocking send and receives from for 0 to core 1.
 3 | To run: epython p2pcomm.py
 4 | */
 5 | 
 6 | from parallel import *
 7 | 
 8 | if coreid()==0:
 9 |   send(20, 1)
10 | elif coreid()==1:
11 |   print "Got value "+str(recv(0))+" from core 0"
12 | 


--------------------------------------------------------------------------------
/examples/input.py:
--------------------------------------------------------------------------------
1 | /*
2 | Illustration of input, note that there will be an input for each core (and the value input for that core reported) so you might want to run with only 1 or 2 cores
3 | To run: epython -c 0 input (to run on core 0 only)
4 | */
5 | 
6 | a=input("Enter your name: ")
7 | print "Hello "+a
8 | 


--------------------------------------------------------------------------------
/examples/coreidentity.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Illustration of printing, core id and conditional statements
 3 | To run: epython coreidentity.py
 4 | */
 5 | 
 6 | from parallel import *
 7 | 
 8 | print "Hello world from core "+str(coreid())
 9 | 
10 | if coreid()==5:
11 |   print "Hello only from core "+str(coreid())
12 | 
13 | 


--------------------------------------------------------------------------------
/examples/broadcast.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Illustration of broadcasting a value from one core (0 here) to each other and displaying the result
 3 | To run: epython broadcast.py
 4 | */
 5 | 
 6 | from parallel import *
 7 | from random import randrange
 8 | 
 9 | a=bcast(randrange(100), 0)
10 | print "The random number from core 0 is "+str(a)
11 | 


--------------------------------------------------------------------------------
/examples/reduction.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Illustration of global reduction across all cores, here we find the maximum random number - can also do min, sum and prod
 3 | To run: epython reduction.py
 4 | */
 5 | 
 6 | from parallel import *
 7 | from random import randrange
 8 | 
 9 | a=reduce(randrange(100), "max")
10 | print "The highest random number is "+str(a)
11 | 


--------------------------------------------------------------------------------
/examples/loops.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Illustration of loops
 3 | To run: epython loops.py
 4 | */
 5 | 
 6 | from util import *
 7 | 
 8 | for x in range(10):
 9 |   print "X="+str(x)
10 | 
11 | for x in range(105,115):
12 |   print "X="+str(x)
13 | 
14 | list=[10,20,30,40,50,60]
15 | for x in list:
16 |   print "List item="+str(x)
17 | 
18 | i=10
19 | while i<=20:
20 |   print "I="+str(i)
21 |   i+=1
22 | 


--------------------------------------------------------------------------------
/examples/functions.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Illustration of defining functions, note how in anotherfn we provide default values incase the user does not specify them
 3 | To run: epython functions.py
 4 | */
 5 | 
 6 | def fn(a,b):
 7 |   print a+b
 8 | 
 9 | def anotherfn(a=10, b=20):
10 |   return a+b
11 | 
12 | 
13 | fn(1,2)
14 | fn("hello ", "world")
15 | 
16 | print anotherfn()
17 | print anotherfn(2)
18 | print anotherfn(2,4)


--------------------------------------------------------------------------------
/examples/arrays.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Illustration of arrays, getting a random number and printing values out. Dim will pop the array in core local memory, you can specify shared memory via sdim
 3 | To run: epython arrays.py
 4 | */
 5 | 
 6 | from random import randrange
 7 | 
 8 | a=[0]*100
 9 | i=0
10 | while i<100:
11 |   a[i]=i
12 |   i+=1
13 | r=randrange(100)
14 | print "Random index is "+str(r)+" value is "+str(a[r])
15 | 


--------------------------------------------------------------------------------
/examples/synccores.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Illustrates the synchronisation across all cores, all cores display the hello message,wait and then the after message once othe cores have
 3 | caught up. Comment out the sync line and rerun to see the messages more interleaved
 4 | To run: epython synccores.py
 5 | */
 6 | from parallel import *
 7 | 
 8 | print "Hello from core "+str(coreid())
 9 | sync()
10 | print "After sync from core "+str(coreid())
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Object files
 2 | *.o
 3 | *.d
 4 | *.ko
 5 | *.obj
 6 | *.elf
 7 | *.cbp
 8 | *.layout
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Libraries
15 | *.lib
16 | *.a
17 | *.la
18 | *.lo
19 | 
20 | # Shared objects (inc. Windows DLLs)
21 | *.dll
22 | *.so
23 | *.so.*
24 | *.dylib
25 | 
26 | # Executables
27 | *.exe
28 | *.out
29 | *.app
30 | *.i*86
31 | *.x86_64
32 | *.hex
33 | 
34 | # Eclipse
35 | .project
36 | .cproject
37 | 
38 | /Debug/
39 | 


--------------------------------------------------------------------------------
/examples/pi_offload.py:
--------------------------------------------------------------------------------
 1 | from epython import offload
 2 | 
 3 | @offload
 4 | def findPI(darts, rounds):
 5 |   from random import random
 6 |   from math import pow
 7 | 
 8 |   mypi=0.0
 9 |   i=1
10 |   while i<=rounds:
11 |     score=0.0
12 |     j=1
13 |     while j<=darts:
14 |       x=random()
15 |       y=random()
16 |       if (pow(x,2) + pow(y,2) < 1.0):
17 |         score+=1
18 |       j+=1
19 |     mypi=mypi+4.0 * (score/darts)
20 |     i+=1
21 |   return mypi
22 | 
23 | pi=sum(findPI(100,10))
24 | print "Value of PI is "+str((pi/10)/16)
25 | 


--------------------------------------------------------------------------------
/examples/controlflow.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Illustrates control flow, you can limit the cores run with the -c argument
 3 | i.e. epython controlflow.py will run on all cores
 4 | i.e. epython -c 1 controlflow.py will run on core 1 only
 5 | i.e. epython -c 1,2,3,4,9 controlflow.py will run on cores 1,2,3,4 and 9
 6 | i.e. epython -c 1:7 controlflow.py will run on cores 1 to 7 inclusive
 7 | */
 8 | 
 9 | 
10 | from parallel import *
11 | 
12 | if coreid()==0 or coreid()==1:
13 |   print "Core id is 0 or 1"
14 | elif coreid()==2:
15 |   print "Core id is 2"
16 | else:
17 |   print "Core id is not 0, 1 or 2"
18 | 


--------------------------------------------------------------------------------
/examples/mandlebrot.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Simple mandlbrot example, based on a version by Mike Bell
 3 | */
 4 | 
 5 | from parallel import *
 6 | from util import *
 7 | 
 8 | outstr=""
 9 | 
10 | x=-2.0
11 | y=-1.0 + (coreid()*0.125)
12 | 
13 | i=0
14 | while i<=66:
15 |   re = x
16 |   im = y
17 |   j=0
18 |   while j<=20:
19 |     re_next = re*re - im*im + x
20 |     im = 2*re*im + y
21 |     re=re_next
22 |     j+=1
23 |   if re*re + im*im < 4:
24 |     outstr = outstr+"#"
25 |   else:
26 |     outstr = outstr+" "
27 |   x+=0.05
28 |   i+=1
29 | 
30 | for i in range(16):
31 |   if coreid()==i:
32 |     print outstr
33 |   sync()
34 | 
35 | 


--------------------------------------------------------------------------------
/examples/odd-even-sort.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Simple odd-even sort
 3 | */
 4 | 
 5 | from util import *
 6 | from random import randrange
 7 | from array import len, array
 8 | 
 9 | x=array(100)
10 | for i in range(99):
11 | 	x[i]=randrange(100)
12 | 
13 | sorted=false
14 | while not sorted:
15 | 	sorted=true
16 | 	i=0
17 | 	while i<len(x):
18 | 		if x[i] > x[i+1]:
19 | 			temp= x[i]
20 | 			x[i]=x[i+1]
21 | 			x[i+1] = temp
22 | 			sorted=false
23 | 		i+=2
24 | 	i=1
25 | 	while i<len(x)-1:
26 | 		if x[i] > x[i+1]:
27 | 			temp= x[i]
28 | 			x[i]=x[i+1]
29 | 			x[i+1] = temp
30 | 			sorted=false
31 | 		i+=2
32 | 
33 | for i in x:
34 | 	print i


--------------------------------------------------------------------------------
/device/makefile:
--------------------------------------------------------------------------------
 1 | CC=e-gcc
 2 | CFLAGS=-I ../ -I ../interpreter -Os -fno-exceptions -freg-struct-return -fno-default-inline
 3 | LDFLAGS=-T linker.ldf -Wl,--gc-sections
 4 | 
 5 | all: clean epython-device.elf
 6 | epython-device.elf: main.o device-functions.o ../interpreter/interpreter.o
 7 | bins = epython-device.elf
 8 | 
 9 | .PHONE: check
10 | 
11 | %.o : %.c
12 | 	$(CC) $(CFLAGS) -MMD -o $@ -c $<
13 | 
14 | $(bins) :
15 | 	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS) -le-lib
16 | 	e-objcopy --srec-forceS3 --output-target srec epython-device.elf epython-device.srec
17 | 
18 | clean:
19 | 	$(RM) -f -v $(bins) *.yy.[ch] *.tab.[ch] *.o *.d *.output ../interpreter/*.o
20 | 
21 | -include *.d
22 | 


--------------------------------------------------------------------------------
/examples/task_farm_example.py:
--------------------------------------------------------------------------------
 1 | import taskfarm
 2 | 
 3 | initTaskFarm(0)
 4 | 
 5 | if (coreid() == 0):
 6 | 	execFunction(1, exampleFn, 35)		
 7 | 	arr=[0]*10
 8 | 	i=0
 9 | 	while i<len(arr):
10 | 		arr[i]=i
11 | 		i+=1
12 | 	execFunction(2, addValuesFn, arr, 100)	
13 | 	print waitFunctionFinish(1)
14 | 	arr=waitFunctionFinish(2)
15 | 	for i in arr:
16 | 		print i
17 | 	shutdownTaskFarm()
18 | else:
19 | 	worker()
20 | 
21 | 
22 | def exampleFn(a):
23 | 	print "Running exampleFn with input "+a
24 | 	return a+10
25 | 
26 | def addValuesFn(a, b):
27 | 	i=0
28 | 	print "Running addValuesFn with array length "+len(a)+" adding on "+b
29 | 	while i<len(a):
30 | 		a[i]=a[i]+b
31 | 		i+=1
32 | 	return a


--------------------------------------------------------------------------------
/epython.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | ELIBS="${EPIPHANY_HOME}/tools/host/lib"
 6 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${ELIBS}
 7 | EHDF=/opt/adapteva/esdk/bsps/current/parallella_E16G3_1GB.hdf
 8 | 
 9 | export EPYTHONPATH=$EPYTHONPATH:`pwd`
10 | 
11 | OS_MAJ="$(uname -a | cut -d' ' -f3 | cut -d'.' -f1)"
12 | OS_VER="$(uname -a | cut -d' ' -f3 | cut -d'.' -f2)"
13 | 
14 | FILE=epython-host
15 | 
16 | if [ -f $FILE ]
17 | then
18 | FILE=./epython-host
19 | else
20 | FILE=/usr/bin/epython-host
21 | fi
22 | 
23 | if [[ "$OS_VER" -ge "14" || "$OS_MAJ" -gt "3" ]]
24 | then
25 | $FILE "$@"
26 | else
27 | sudo -E LD_LIBRARY_PATH=${ELIBS} EPIPHANY_HDF=${EHDF} $FILE -srec "$@"
28 | fi
29 | 


--------------------------------------------------------------------------------
/host/host-functions.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * host-functions.h
 3 |  *
 4 |  *  Created on: 8 Apr 2015
 5 |  *      Author: nick
 6 |  */
 7 | 
 8 | #ifndef HOST_FUNCTIONS_H_
 9 | #define HOST_FUNCTIONS_H_
10 | 
11 | #include "../shared.h"
12 | 
13 | void initHostCommunicationData(int, struct shared_basic*, int);
14 | void sendData(struct value_defn, int, char, int, int);
15 | struct value_defn recvData(int, int, int);
16 | struct value_defn sendRecvData(struct value_defn, int, int, int);
17 | struct value_defn bcastData(struct value_defn, int, int, int, int);
18 | struct value_defn reduceData(struct value_defn, int, int, int, int);
19 | struct value_defn probeForMessage(int, int, int);
20 | 
21 | #endif /* HOST_FUNCTIONS_H_ */
22 | 


--------------------------------------------------------------------------------
/examples/task_farm_pi.py:
--------------------------------------------------------------------------------
 1 | import parallel
 2 | import taskfarm
 3 | import util
 4 | from math import pow
 5 | from random import random
 6 | 
 7 | rounds=10
 8 | initTaskFarm(0)
 9 | 
10 | if (coreid()==0):
11 | 	piVal=0.0
12 | 	j=0
13 | 	while j<rounds:
14 | 		for i in range(1,numcores()-1):
15 | 			execFunction(i, simulateDarts, 1000)
16 | 		for i in range(1,numcores()-1):
17 | 			piVal+=waitFunctionFinish(i)
18 | 		j+=1
19 | 	
20 | 	print (piVal/rounds)/(numcores()-1)
21 | 	shutdownTaskFarm()
22 | else:
23 | 	worker()
24 | 
25 | def simulateDarts(num_darts):
26 | 	score=0.0
27 | 	j=1
28 | 	while j<=num_darts:
29 | 		x=random()
30 | 		y=random()
31 | 
32 | 		if (pow(x,2) + pow(y,2) < 1.0): score+=1			
33 | 		j+=1
34 | 	return 4.0 * (score/num_darts)
35 | 	


--------------------------------------------------------------------------------
/modules/coprocessor.py:
--------------------------------------------------------------------------------
 1 | import parallel
 2 | import taskfarm
 3 | import array
 4 | 
 5 | globalLookup=array(10)
 6 | globalLookupPoint=0
 7 | 
 8 | def registerGlobalVariable(a):
 9 | 	globalLookup[globalLookupPoint]=symbol(a)
10 | 	globalLookupPoint+=1
11 | 
12 | @exportable
13 | def copyToGlobal(gid, data):
14 | 	srcdata=None
15 | 	alias(srcdata, globalLookup[gid])
16 | 	if (len(srcdata) != len(data)):
17 | 		print "Error, data to write must equal the target data size"
18 | 		quit()
19 | 	elif(len(srcdata) == 0):
20 | 		srcdata=data
21 | 	else:
22 | 		srcdata[0]=data[0]
23 | 		arraycopy(srcdata, data)
24 | 
25 | @exportable
26 | def copyFromGlobal(gid):
27 | 	srcdata=None
28 | 	alias(srcdata, globalLookup[gid])
29 | 	return srcdata
30 | 
31 | initTaskFarm(16)
32 | 


--------------------------------------------------------------------------------
/examples/pi.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | The dartboard method to find PI which is an example of a Monte Carlo method. Apart from the reduction at the end it is embarasingly
 3 | parallel.
 4 | */
 5 | 
 6 | from parallel import *
 7 | from random import random
 8 | from math import pow
 9 | 
10 | darts=100
11 | rounds=10
12 | mypi=0.0
13 | 
14 | if coreid()==0: print "Using "+numcores()+" cores to estimate PI, this might take a few moments....."
15 | i=1
16 | while i<=rounds:
17 |   score=0.0
18 |   j=1
19 |   while j<=darts:
20 |     x=random()
21 |     y=random()
22 | 
23 |     if (pow(x,2) + pow(y,2) < 1.0):
24 |       score+=1
25 |     j+=1
26 |   mypi=mypi+4.0 * (score/darts)
27 |   i+=1
28 | mypi=reduce(mypi, "sum")
29 | if coreid()==0: print "Value of PI="+str((mypi/rounds)/numcores())
30 | 


--------------------------------------------------------------------------------
/examples/haloswap.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Illustrates one dimensional halo swapping from cores to the left and right
 3 | Note that there is no wrap around, therefore the left most value at core 0 and the right
 4 | most value at core 15 remains unchanged and is the core id.
 5 | */
 6 | 
 7 | from parallel import *
 8 | from random import randrange
 9 | 
10 | DATA_SIZE=12
11 | data=[0]*DATA_SIZE
12 | 
13 | i=0
14 | while i<DATA_SIZE:
15 |         data[i]=coreid()
16 |         i+=1
17 | 
18 | if (coreid() > 0):
19 |         data[0]=sendrecv(coreid(), coreid()-1)
20 | 
21 | if (coreid() < 15):
22 |         data[11]=sendrecv(coreid(), coreid()+1)
23 | 
24 | displaypid=bcast(randrange(16), 0)
25 | 
26 | if coreid()==displaypid:
27 |         for i in data:
28 |                 print "Value is "+str(i)
29 | 
30 | 


--------------------------------------------------------------------------------
/modules/util.py:
--------------------------------------------------------------------------------
 1 | from array import len
 2 | 
 3 | def range(a,b=none,c=none):
 4 | 	i_a=a
 5 | 	i_b=b
 6 | 	i_c=c
 7 | 	if (b is none):
 8 | 		i_b=a
 9 | 		i_a=0
10 | 	if (c is none):
11 | 		i_c=1
12 | 	r=[0]*(((i_b-i_a)/i_c)+1)
13 | 	i=i_a
14 | 	j=0
15 | 	while i<=i_b:
16 | 		r[j]=i
17 | 		if (c is none):
18 | 			i+=1
19 | 		else:
20 | 			i=i+c
21 | 		j=j+1
22 | 	return r
23 | 
24 | def xrange(a,b):
25 | 	return range(a,b)
26 | 
27 | def oddSort(x, length=none):
28 |   l=length
29 |   if (length is none):
30 |     l=len(x)
31 |   sorted=false
32 |   while not sorted:
33 |     sorted=true
34 |     i=0
35 |     while i<len(x):
36 |         if x[i] > x[i+1]:
37 |             temp= x[i]
38 |             x[i]=x[i+1]
39 |             x[i+1] = temp
40 |             sorted=false
41 |         i+=2
42 |     i=1
43 |     while i<len(x)-1:
44 |         if x[i] > x[i+1]:
45 |             temp= x[i]
46 |             x[i]=x[i+1]
47 |             x[i+1] = temp
48 |             sorted=false
49 |         i+=2
50 | 


--------------------------------------------------------------------------------
/modules/math.py:
--------------------------------------------------------------------------------
 1 | def pow(a,b):
 2 | 	return a ^ b
 3 | 
 4 | def pi():
 5 |     return 3.141592
 6 | 
 7 | def e():
 8 |     return 2.718281
 9 | 
10 | def exp(x):
11 | 	return pow(e(), x)
12 | 
13 | def sqrt(a):
14 |     return native rtl_math(0, a)
15 | 
16 | def sin(a):
17 |     return native rtl_math(1, a)
18 | 
19 | def cos(a):
20 |     return native rtl_math(2, a)
21 | 
22 | def tan(a):
23 |     return native rtl_math(3, a)
24 | 
25 | def asin(a):
26 |     return native rtl_math(4, a)
27 | 
28 | def acos(a):
29 |     return native rtl_math(5, a)
30 | 
31 | def atan(a):
32 |     return native rtl_math(6, a)
33 | 
34 | def sinh(a):
35 |     return native rtl_math(7, a)
36 | 
37 | def cosh(a):
38 |     return native rtl_math(8, a)
39 | 
40 | def tanh(a):
41 |     return native rtl_math(9, a)
42 | 
43 | def floor(a):
44 |     return native rtl_math(10, a)
45 | 
46 | def ceil(a):
47 |     return native rtl_math(11, a)
48 | 
49 | def log(a):
50 |     return native rtl_math(12, a)
51 | 
52 | def log10(a):
53 |     return native rtl_math(13, a)
54 | 


--------------------------------------------------------------------------------
/host/makefile:
--------------------------------------------------------------------------------
 1 | CFLAGS := -O3 -DHOST_INTERPRETER -Wall -Wextra -Wno-unused-parameter -Wmissing-prototypes -std=c99 -I ../interpreter
 2 | OBJECTS := lexer.o parser.o main.o memorymanager.o byteassembler.o stack.o misc.o configuration.o ../interpreter/interpreter.o host-functions.o python_interoperability.o
 3 | 
 4 | LIBS=-lm -lpthread
 5 | 
 6 | ifeq ($(STANDALONE),1)
 7 | CFLAGS+= -DHOST_STANDALONE
 8 | else
 9 | CFLAGS+= -I../ -I ${EPIPHANY_HOME}/tools/host/include -D__HOST__ -Dasm=__asm__ -Drestrict=
10 | OBJECTS+=device-support.o
11 | CC=arm-linux-gnueabihf-gcc
12 | LDFLAGS=-L ${EPIPHANY_HOME}/tools/host/lib
13 | LIBS+=-le-hal -lrt
14 | ifneq (,$(wildcard ${EPIPHANY_HOME}/tools/host/lib/libe-loader.so))
15 |     LIBS+=-le-loader
16 | endif
17 | endif
18 | 
19 | YFLAGS := -d
20 | LFLAGS :=
21 | 
22 | epython: $(OBJECTS)
23 | 	$(CC) $(LDFLAGS) -o epython-host $(OBJECTS) $(LIBS)
24 | 
25 | full: lexer parser epython
26 | 
27 | .PHONE: check
28 | 
29 | %.o : %.c
30 | 	$(CC) $(CFLAGS) -MMD -o $@ -c $<
31 | 
32 | lexer:
33 | 	$(LEX) $(LFLAGS) -o lexer.c epython.l
34 | 
35 | parser:
36 | 	$(YACC) $(YFLAGS) -o parser.c epython.y
37 | 
38 | clean:
39 | 	$(RM) -f -v $(bins) *.yy.[ch] *.tab.[ch] *.o *.d *.output
40 | 
41 | -include *.d
42 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Nick Brown
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/host/stack.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * stack.h
 3 |  *
 4 |  *  Created on: 9 July 2015
 5 |  *      Author: Nick Brown
 6 |  */
 7 | 
 8 | #ifndef HOST_STACK_H_
 9 | #define HOST_STACK_H_
10 | 
11 | #define INITIAL_STACK_SIZE 10
12 | 
13 | struct stack_t {
14 |     int size, width;
15 |     char * type;
16 |     void **data;
17 | };
18 | 
19 | struct identifier_exp {
20 | 	char * identifier;
21 | 	struct memorycontainer* exp;
22 | };
23 | 
24 | #include "byteassembler.h"
25 | 
26 | struct stack_t* getNewStack(void);
27 | void clearStack(struct stack_t*);
28 | void initStack(struct stack_t*);
29 | int getStackSize(struct stack_t*);
30 | int pop(struct stack_t*);
31 | void push(struct stack_t*, int);
32 | char* popIdentifier(struct stack_t*);
33 | void pushIdentifier(struct stack_t*, char*);
34 | int peek(struct stack_t*);
35 | int getTopType(struct stack_t*);
36 | struct memorycontainer* popExpression(struct stack_t*);
37 | void pushExpression(struct stack_t*, struct memorycontainer*);
38 | void pushIdentifierAssgnExpression(struct stack_t*, char*, struct memorycontainer*);
39 | struct identifier_exp* popExpressionIdentifier(struct stack_t*);
40 | struct memorycontainer* getExpressionAt(struct stack_t*, int);
41 | struct identifier_exp* getExpressionIdentifierAt(struct stack_t*, int);
42 | char* getIdentifierAt(struct stack_t*, int);
43 | int getTypeAt(struct stack_t*, int);
44 | 
45 | #endif /* HOST_STACK_H_ */
46 | 


--------------------------------------------------------------------------------
/examples/jacobi_offload.py:
--------------------------------------------------------------------------------
 1 | from epython import *
 2 | import array
 3 | import math
 4 | 
 5 | data=None
 6 | data_p1=None
 7 | define_on_device(data)
 8 | define_on_device(data_p1)
 9 | 
10 | MAX_ITS=10000
11 | 
12 | @offload
13 | def initialise(global_size):
14 |   num_local=global_size/(numcores()-1)
15 |   if num_local * (numcores()-1) != global_size:
16 |     if (coreid() < global_size-num_local*(numcores()-1)): num_local+=1
17 |   data=[0.0]*(num_local+2)
18 |   data_p1=[0.0]*(num_local+2)
19 |   if coreid()==0: data[0]=1.0
20 |   if coreid()==numcores()-2: data[num_local+1]=10.0
21 | 
22 | @offload
23 | def calc_residual():
24 |   tmpnorm=0.0
25 |   i=1
26 |   while i<=len(data)-2:
27 |     tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2
28 |     i+=1
29 |   return tmpnorm
30 | 
31 | @offload
32 | def jacobi_iteration():
33 |   if (coreid() > 0): data[0]=sendrecv(data[1], coreid()-1)
34 |   if (coreid() < numcores()-2): data[len(data)-1]=sendrecv(data[len(data)-2], coreid()+1)
35 |   i=1
36 |   while i<=len(data)-2:
37 |     data_p1[i]=0.5* (data[i-1] + data[i+1])
38 |     i+=1
39 |   # Swap data around for next iteration
40 |   i=1
41 |   while i<=len(data)-2:
42 |     data[i]=data_p1[i]
43 |     i+=1
44 | 
45 | initialise(100)
46 | a=calc_residual()
47 | bnorm=math.sqrt(sum(a))
48 | 
49 | norm=1.0
50 | it=0
51 | 
52 | while norm > 1e-4:
53 |   jacobi_iteration()
54 |   rn=calc_residual()
55 |   norm=math.sqrt(sum(rn))/bnorm
56 |   it+=1
57 |   if it%50 == 0 : print "Rnorm is "+str(norm)+" after "+str(it)+" iterations"
58 | 


--------------------------------------------------------------------------------
/examples/pipeline.py:
--------------------------------------------------------------------------------
 1 | from parallel import *
 2 | from util import *
 3 | from random import randrange
 4 | from array import len
 5 | 
 6 | sorting_size=100
 7 | data=[0]*sorting_size
 8 | 
 9 | if (coreid()==0):
10 | 	pipelineStageOne(10)
11 | elif (coreid()==1):
12 | 	pipelineStageTwo()
13 | elif (coreid()==2):
14 | 	pipelineStageThree()
15 | elif (coreid()==3):
16 | 	pipelineStageFour()
17 | 
18 | def pipelineStageOne(num_items):
19 | 	for i in range(num_items):
20 | 		num=randrange(sorting_size-5) + 5
21 | 		send(num, coreid()+1)
22 | 	send(-1,coreid()+1)
23 | 
24 | def pipelineStageTwo():
25 | 	num=0
26 | 	while num >= 0:
27 | 		num=recv(coreid()-1)
28 | 		if num > 0:
29 | 			i=0
30 | 			while i < num:
31 | 				data[i]=randrange(1000)
32 | 				i+=1
33 | 		send(num, coreid()+1)
34 | 		if num > 0: send(data, coreid()+1, num)
35 | 
36 | def pipelineStageThree():
37 | 	num=0
38 | 	while num >=0:
39 | 		num=recv(coreid()-1)
40 | 		if num > 0:
41 | 			data=recv(coreid()-1, num)
42 | 			oddSort(data)
43 | 		send(num, coreid()+1)
44 | 		if num > 0: send(data, coreid()+1, num)
45 | 
46 | def pipelineStageFour():
47 | 	num=0
48 | 	num_contig=0.0
49 | 	total_num=0
50 | 	while num >=0:
51 | 		num=recv(coreid()-1)
52 | 		if num > 0:
53 | 			total_num+=num
54 | 			data=recv(coreid()-1, num)
55 | 			cnum=data[0]
56 | 			ccount=1
57 | 			i=0
58 | 			while i < num:
59 | 				if (data[i] == cnum):
60 | 					ccount+=1
61 | 				else:
62 | 					num_contig+=ccount
63 | 					cnum=data[i]
64 | 					ccount=0
65 | 				i+=1
66 | 		chance=(num_contig/total_num)*100
67 | 		print chance+"% of numbers were contiguous"
68 | 


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | prefix ?= /usr
 2 | bindir = $(prefix)/bin
 3 | includedir = $(prefix)/include
 4 | 
 5 | all: epiphany
 6 | 
 7 | standalone: clean
 8 | 	@cd host; $(MAKE) epython STANDALONE=1
 9 | 	@mv host/epython-host .
10 | 
11 | standalone-full: clean
12 | 	@cd host; $(MAKE) full STANDALONE=1
13 | 	@mv host/epython-host .
14 | 
15 | epiphany: clean host-build device-build
16 | 
17 | full: clean host-full device-build
18 | 
19 | host-build:
20 | 	@cd host; $(MAKE) epython
21 | 	@mv host/epython-host .
22 | 
23 | host-full:
24 | 	@cd host; $(MAKE) full
25 | 	@mv host/epython-host .
26 | 	
27 | device-build:	
28 | 	@cd device; $(MAKE)
29 | 	@mv device/epython-device.srec .
30 | 	@mv device/epython-device.elf .
31 | 
32 | clean: 
33 | 	@cd interpreter; rm -f *.o *.d
34 | 	@cd host; $(MAKE) clean
35 | 	@cd device; $(MAKE) clean
36 | 
37 | install:
38 | 	@mkdir -p $(DESTDIR)$(bindir)
39 | 	@cp epython-host epython-device.srec epython-device.elf $(DESTDIR)$(bindir)
40 | 	@cp epython.sh $(DESTDIR)$(bindir)/epython
41 | 	@mkdir -p $(DESTDIR)$(includedir)/epython
42 | 	@cp -R modules $(DESTDIR)$(includedir)/epython/.
43 | 	@echo 'export EPYTHONPATH=$$EPYTHONPATH:$(includedir)/epython/modules:$(shell pwd)' >> ~/.bashrc
44 | 	@echo 'export PYTHONPATH=$$PYTHONPATH:$(includedir)/epython/modules/fullpython' >> ~/.bashrc
45 | 	@echo "ePython installed, start a new bash session by executing bash before running ePython"
46 | 
47 | uninstall:
48 | 	@rm $(DESTDIR)$(bindir)/epython-host
49 | 	@rm $(DESTDIR)$(bindir)/epython-device.srec
50 | 	@rm $(DESTDIR)$(bindir)/epython-device.elf
51 | 	@rm $(DESTDIR)$(bindir)/epython
52 | 	@rm $(DESTDIR)$(includedir)/epython/modules/*.py
53 | 


--------------------------------------------------------------------------------
/examples/parallel-odd-even-sort.py:
--------------------------------------------------------------------------------
 1 | from util import *
 2 | from parallel import *
 3 | from random import randrange
 4 | from array import len, array
 5 | 
 6 | ln=25
 7 | N=ln * numcores()
 8 | x=[0]*ln
 9 | other=[0]*ln
10 | i=0
11 | while i < ln:
12 |   x[i]=randrange(1000)
13 |   i+=1
14 | 
15 | k=0
16 | while k <= numcores()-1:
17 |   oddSort(x)
18 |   partner=0
19 |   if (k%2 == 0):
20 |     if (coreid() % 2 == 0):
21 |       partner=coreid()+1
22 |     else:
23 |       partner=coreid()-1
24 |   else:
25 |     if (coreid()%2 == 0):
26 |       partner=coreid()-1
27 |     else:
28 |       partner=coreid()+1
29 |   if (partner >= 0 and partner < numcores()):
30 |     other=sendrecv(x, partner, ln)
31 |     if coreid() < partner:
32 |       swap_values(other, x)
33 |     else:
34 |       swap_values(x, other)
35 |   k+=1
36 | 
37 | for j in range(numcores()-1):
38 |   if (j==coreid()):
39 |     for i in range(ln-1):
40 |       print x[i]
41 |   sync()
42 | 
43 | def swap_values(a, b):
44 |   searching=true
45 |   while searching:
46 |     searching=false
47 |     min_index=get_min_index(a)
48 |     max_index=get_max_index(b)
49 |     if (a[min_index] < b[max_index]):
50 |       temp=a[min_index]
51 |       a[min_index]=b[max_index]
52 |       b[max_index]=temp
53 |       searching=true
54 | 
55 | def get_min_index(a):
56 |   v=0
57 |   j=-1
58 |   i=0
59 |   while i <= len(a)-1:
60 |     if (j == -1 or v > a[i]):
61 |       v=a[i]
62 |       j=i
63 |     i+=1
64 |   return j
65 | 
66 | def get_max_index(a):
67 |   v=0
68 |   j=-1
69 |   i=0
70 |   while i <= len(a)-1:
71 |     if (j == -1 or v < a[i]):
72 |       v=a[i]
73 |       j=i
74 |     i+=1
75 |   return j


--------------------------------------------------------------------------------
/modules/parallel.py:
--------------------------------------------------------------------------------
 1 | def send(data, pid, n=none):
 2 | 	if (n is none):
 3 | 		native rtl_send(data,pid)
 4 | 	else:
 5 | 		i=0
 6 | 		while i<n:
 7 | 			d=data[i]
 8 | 			native rtl_send(d,pid)
 9 | 			i=i+1
10 | 
11 | def nonblocking_send(data, pid):
12 |     native rtl_send_nonblocking(data, pid)
13 | 
14 | def test_sent(pid):
15 |     return native rtl_test_for_send(pid)
16 | 
17 | def wait_sent(pid):
18 |     return native rtl_wait_for_send(pid)
19 | 
20 | def recv(pid, n=none):
21 | 	if (n is none):
22 | 		return native rtl_recv(pid)
23 | 	else:
24 | 		retV=[0]*n
25 | 		i=0
26 | 		while i<n:
27 | 			retV[i]=native rtl_recv(pid)
28 | 			i=i+1
29 | 		return retV
30 | 
31 | def probe(pid):
32 |     return native rtl_probe(pid)
33 | 
34 | def sendrecv(data, pid, n=none):
35 | 	if (n is none):
36 | 		return native rtl_sendrecv(data, pid)
37 | 	else:
38 | 		retV=[0]*n
39 | 		i=0
40 | 		while i<n:
41 | 			d=data[i]
42 | 			retV[i]=native rtl_sendrecv(d, pid)
43 | 			i=i+1
44 | 		return retV
45 | 
46 | def bcast(data, root):
47 | 	return native rtl_bcast(data, root)
48 | 
49 | def reduce(data, operator):
50 | 	opVal=0
51 | 	if operator=="sum":
52 | 	    opVal=0
53 | 	elif operator=="min":
54 | 		opVal=1
55 | 	elif operator=="max":
56 | 		opVal=2
57 | 	elif operator=="prod":
58 | 		opVal=3
59 | 	else:
60 | 		print "Operator "+operator+" not found"
61 |     return native rtl_reduce(data, opVal)
62 | 
63 | def sync():
64 | 	native rtl_sync()
65 | 
66 | def coreid():
67 | 	return native rtl_coreid()
68 | 
69 | def numcores():
70 | 	return native rtl_numcores()
71 | 
72 | def ishost():
73 | 	return native rtl_ishost()
74 | 
75 | def isdevice():
76 | 	return native rtl_isdevice()
77 | 


--------------------------------------------------------------------------------
/examples/split_pipeline.py:
--------------------------------------------------------------------------------
 1 | from parallel import *
 2 | from util import *
 3 | from random import randrange
 4 | from array import len
 5 | 
 6 | sorting_size=100
 7 | data=[0]*sorting_size
 8 | 
 9 | if (coreid()==0):
10 | 	pipelineStageOne(10)
11 | else:
12 | 	if (coreid() % 3 == 1):
13 | 		pipelineStageTwo()
14 | 	elif (coreid() % 3 == 2):
15 | 		pipelineStageThree()
16 | 	else:
17 | 		pipelineStageFour()
18 | 
19 | def pipelineStageOne(num_items):
20 | 	matchingpid=1
21 | 	for i in range(num_items):
22 | 		num=randrange(sorting_size-5) + 5
23 | 		send(num, matchingpid)
24 | 		matchingpid+=3
25 | 		if matchingpid > 13: matchingpid=1
26 | 	for i in range(1,13,3):
27 | 		send(-1,i)
28 | 
29 | def pipelineStageTwo():
30 | 	num=0
31 | 	while num >= 0:
32 | 		num=recv(0)
33 | 		if num > 0:
34 | 			i=0
35 | 			while i < num:
36 | 				data[i]=randrange(1000)
37 | 				i+=1
38 | 		send(num, coreid()+1)
39 | 		if num > 0: send(data, coreid()+1, num)
40 | 
41 | def pipelineStageThree():
42 | 	num=0
43 | 	while num >=0:
44 | 		num=recv(coreid()-1)
45 | 		if num > 0:
46 | 			data=recv(coreid()-1, num)
47 | 			oddSort(data, num)
48 | 		send(num, coreid()+1)
49 | 		if num > 0: send(data, coreid()+1, num)
50 | 
51 | def pipelineStageFour():
52 | 	num=0
53 | 	num_contig=0.0
54 | 	total_num=0
55 | 	while num >=0:
56 | 		num=recv(coreid()-1)
57 | 		if num > 0:
58 | 			total_num+=num
59 | 			data=recv(coreid()-1, num)
60 | 			cnum=data[0]
61 | 			ccount=1
62 | 			i=0
63 | 			while i < num:
64 | 				if (data[i] == cnum):
65 | 					ccount+=1
66 | 				else:
67 | 					num_contig+=ccount
68 | 					cnum=data[i]
69 | 					ccount=0
70 | 				i+=1
71 | 		chance=(num_contig/total_num)*100
72 | 		print chance+"% of numbers were contiguous"
73 | 


--------------------------------------------------------------------------------
/host/python_interoperability.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2016, Nick Brown
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *
 8 |  * Redistributions of source code must retain the above copyright notice, this
 9 |  * list of conditions and the following disclaimer.
10 |  *
11 |  * Redistributions in binary form must reproduce the above copyright notice,
12 |  * this list of conditions and the following disclaimer in the documentation
13 |  * and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef PYTHONINTEROPERABILITY_H_
28 | #define PYTHONINTEROPERABILITY_H_
29 | 
30 | #include "../shared.h"
31 | #include "configuration.h"
32 | #include <pthread.h>
33 | 
34 | void runFullPythonInteractivityOnHost(struct interpreterconfiguration*, struct shared_basic*, pthread_t*, char);
35 | 
36 | #endif /* CONFIGURATION_H_ */
37 | 


--------------------------------------------------------------------------------
/device/main.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2015, Nick Brown
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *
 8 |  * Redistributions of source code must retain the above copyright notice, this
 9 |  * list of conditions and the following disclaimer.
10 |  *
11 |  * Redistributions in binary form must reproduce the above copyright notice,
12 |  * this list of conditions and the following disclaimer in the documentation
13 |  * and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef MAIN_H_
28 | #define MAIN_H_
29 | #include <e-lib.h>
30 | #include "shared.h"
31 | 
32 | extern volatile e_barrier_t  syncbarriers[TOTAL_CORES], collectivebarriers[TOTAL_CORES];
33 | extern e_barrier_t  *sync_tgt_bars[TOTAL_CORES], *collective_tgt_bars[TOTAL_CORES];
34 | extern volatile struct shared_basic * sharedData;
35 | extern volatile unsigned char syncValues[TOTAL_CORES];
36 | extern int myId, lowestCoreId;
37 | 
38 | #endif /* MAIN_H_ */
39 | 


--------------------------------------------------------------------------------
/host/misc.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2016, Nick Brown
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *
 8 |  * Redistributions of source code must retain the above copyright notice, this
 9 |  * list of conditions and the following disclaimer.
10 |  *
11 |  * Redistributions in binary form must reproduce the above copyright notice,
12 |  * this list of conditions and the following disclaimer in the documentation
13 |  * and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef MISC_H_
28 | #define MISC_H_
29 | 
30 | #define SQRT_MATHS_OP 0
31 | #define SIN_MATHS_OP 1
32 | #define COS_MATHS_OP 2
33 | #define TAN_MATHS_OP 3
34 | #define ASIN_MATHS_OP 4
35 | #define ACOS_MATHS_OP 5
36 | #define ATAN_MATHS_OP 6
37 | #define SINH_MATHS_OP 7
38 | #define COSH_MATHS_OP 8
39 | #define TANH_MATHS_OP 9
40 | #define FLOOR_MATHS_OP 10
41 | #define CEIL_MATHS_OP 11
42 | #define LOG_MATHS_OP 12
43 | #define LOG10_MATHS_OP 13
44 | #define RANDOM_MATHS_OP 14
45 | 
46 | void errorCheck(int, char*);
47 | char* translateErrorCodeToMessage(unsigned char);
48 | 
49 | #endif /* CONFIGURATION_H_ */
50 | 


--------------------------------------------------------------------------------
/host/configuration.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2015, Nick Brown
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *
 8 |  * Redistributions of source code must retain the above copyright notice, this
 9 |  * list of conditions and the following disclaimer.
10 |  *
11 |  * Redistributions in binary form must reproduce the above copyright notice,
12 |  * this list of conditions and the following disclaimer in the documentation
13 |  * and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef CONFIGURATION_H_
28 | #define CONFIGURATION_H_
29 | 
30 | #define VERSION_IDENT "2.0"
31 | 
32 | // Configuration structure which is filled based upon command line arguments
33 | struct interpreterconfiguration {
34 | 	char * intentActive;
35 | 	char displayStats, displayTiming, forceCodeOnCore, forceCodeOnShared, forceDataOnShared, displayPPCode;
36 | 	char * filename, *compiledByteFilename, *loadByteFilename, *pipedInContents;
37 | 	int hostProcs, coreProcs, loadElf, loadSrec, fullPythonHost;
38 | };
39 | 
40 | struct interpreterconfiguration* readConfiguration(int, char *[]);
41 | 
42 | #endif /* CONFIGURATION_H_ */
43 | 


--------------------------------------------------------------------------------
/docs/installupgrade.md:
--------------------------------------------------------------------------------
 1 | # Installing ePython
 2 | 
 3 | To install ePython you need to checkout a version from the repository, build it and then install it. Firstly, log into your Parallella and issue
 4 | 
 5 | ```
 6 | git clone https://github.com/mesham/epython.git
 7 | ```
 8 | 
 9 | copy ePython onto your machine and a directory called *epython* will have been created. Next we are going to build this by issuing the make command, from the top level directory (the one you issued the git clone into) issue the following two commands:
10 | 
11 | ```
12 | cd epython
13 | make
14 | ```
15 | 
16 | The build process takes around 20-30 seconds and once complete you will be returned to the bash prompt. The next (and last) step is to install ePython to a central location by issuing the two commands below. You will be promoted for your user's password with the first command. The second command (bash) starts a new bash terminal which has the python paths correctly set and every time bash starts from now on it will be correctly configured for ePython.
17 | 
18 | ```
19 | sudo make install
20 | bash
21 | ```
22 | 
23 | Congratulations! You have installed ePython and are ready to start programming! 
24 | 
25 | # Upgrading ePython
26 | 
27 | ePython is actively being developed, so new features and bug fixes are being added to the code. Because of this, and to ensure you can run the latest examples, it is useful to periodically ensure you have the latest ePython and if not upgrade your version. The process is similar to the installation process, ensure you are in the *epython* directory (you might need to issue *cd epython*) and then issue:
28 | 
29 | ```
30 | git pull
31 | ```
32 | 
33 | This will contact the ePython repository and download any updates. If you see the message *Already up-to-date.* then you already have the latest version and can stop here, if not then you have downloaded some updates and we need to build and install these via:
34 | 
35 | ```
36 | make
37 | sudo make install
38 | ```
39 | 
40 | This will replace the older version of ePython with this latest, newer version. It is just the ePython interpreter that is being updated, so don't worry - all your own Python codes will remain unchanged and untouched by this.
41 | 


--------------------------------------------------------------------------------
/examples/jacobi.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Jacobi iteration to solve Laplace's equation for diffusion in one dimension
 3 | This illustrates distributing data amongst the cores, halo swapping and reductions
 4 | */
 5 | 
 6 | from parallel import *
 7 | from math import sqrt
 8 | 
 9 | DATA_SIZE=100
10 | MAX_ITS=10000
11 | 
12 | # Work out the amount of data to hold on this core
13 | local_size=DATA_SIZE/numcores()
14 | if local_size * numcores() != DATA_SIZE:
15 |         if (coreid() < DATA_SIZE-local_size*numcores()): local_size=local_size+1
16 | 
17 | # Allocate the two arrays (two as this is Jacobi) we +2 to account for halos/boundary conditions
18 | data=[0] * (local_size+2)
19 | data_p1=[0]* (local_size+2)
20 | 
21 | # Set the initial conditions
22 | i=0
23 | while i<=local_size+1:
24 |         data[i]=0.0
25 |         i+=1
26 | 
27 | if coreid()==0: data[0]=1.0
28 | if coreid()==numcores()-1: data[local_size+1]=10.0
29 | 
30 | # Compute the initial absolute residual
31 | tmpnorm=0.0
32 | i=1
33 | while i<=local_size:
34 |         tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2
35 |         i+=1
36 | tmpnorm=reduce(tmpnorm, "sum")
37 | bnorm=sqrt(tmpnorm)
38 | 
39 | norm=1.0
40 | its=0
41 | while norm >= 1e-4 and its < MAX_ITS:
42 |         # Halo swap to my left and right neighbours if I have them
43 |         if (coreid() > 0): data[0]=sendrecv(data[1], coreid()-1)        
44 |         if (coreid() < numcores()-1): data[local_size+1]=sendrecv(data[local_size], coreid()+1)
45 | 
46 |         # Calculate current residual
47 |         tmpnorm=0.0
48 |         i=1
49 |         while i<=local_size:
50 |                 tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2
51 |                 i+=1
52 |         tmpnorm=reduce(tmpnorm, "sum")
53 |         norm=sqrt(tmpnorm)/bnorm
54 | 
55 |         if coreid()==0 and its%1000 == 0: print "RNorm is "+norm+" at "+its+" iterations"
56 | 
57 |         # Performs the Jacobi iteration for Laplace
58 |         i=1
59 |         while i<=local_size:
60 |                 data_p1[i]=0.5* (data[i-1] + data[i+1])
61 |                 i+=1
62 |         # Swap data around for next iteration
63 |         i=1
64 |         while i<=local_size:
65 |                 data[i]=data_p1[i]
66 |                 i+=1
67 |         its+=1
68 | 
69 | if coreid()==0: print "Completed in "+str(its)+" iterations, RNorm="+str(norm)


--------------------------------------------------------------------------------
/examples/gauss-seidel.py:
--------------------------------------------------------------------------------
 1 | /*
 2 | Red-black Gauss Seidel with SOR to solve Laplace's equation for diffusion in one dimension
 3 | This illustrates distributing data amongst the cores, halo swapping and reductions
 4 | */
 5 | 
 6 | from parallel import *
 7 | from math import sqrt
 8 | 
 9 | DATA_SIZE=100
10 | MAX_ITS=10000
11 | W=1.3  # Overrelaxing factor (between 1 and 2)
12 | 
13 | # Work out the amount of data to hold on this core
14 | local_size=DATA_SIZE/numcores()
15 | if local_size * numcores() != DATA_SIZE:
16 |         if (coreid() < DATA_SIZE-local_size*numcores()): local_size=local_size+1
17 | 
18 | # Allocate the two arrays (two as this is Jacobi) we +2 to account for halos/boundary conditions
19 | data=[0]*(local_size+2)
20 | 
21 | # Set the initial conditions
22 | i=0
23 | while i<=local_size+1:
24 |         data[i]=0.0
25 |         i+=1
26 | 
27 | if coreid()==0: data[0]=1.0
28 | if coreid()==numcores()-1: data[local_size+1]=10.0
29 | 
30 | # Compute the initial absolute residual
31 | tmpnorm=0.0
32 | i=1
33 | while i<=local_size:
34 |         tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2
35 |         i+=1
36 | tmpnorm=reduce(tmpnorm, "sum")
37 | bnorm=sqrt(tmpnorm)
38 | norm=1.0
39 | its=0
40 | while norm >= 1e-4 and its < MAX_ITS:
41 |         # Halo swap to my left and right neighbours if I have them
42 |         if (coreid() > 0): data[0]=sendrecv(data[1], coreid()-1)
43 |         if (coreid() < numcores()-1): data[local_size+1]=sendrecv(data[local_size], coreid()+1)
44 | 
45 |         # Calculate current residual
46 |         tmpnorm=0.0
47 |         i=1
48 |         while i<=local_size:
49 |                 tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2
50 |                 i+=1
51 |         tmpnorm=reduce(tmpnorm, "sum")
52 |         norm=sqrt(tmpnorm)/bnorm
53 |         if coreid()==0 and its%1000 == 0: print "RNorm is "+str(norm)+" at "+str(its)+" iterations"
54 |         j=0
55 |         while j<2:
56 |                 if (j==1):
57 |                         i=1
58 |                 else:
59 |                         i=2
60 |                 while i<=local_size:
61 |                         data[i]=((1-W) * data[i]) + 0.5 * W * (data[i-1]+data[i+1])
62 |                         i+=2
63 |                 j+=1
64 |         its+=1
65 | 
66 | if coreid()==0: print "Completed in "+str(its)+" iterations, RNorm="+str(norm)


--------------------------------------------------------------------------------
/modules/array.py:
--------------------------------------------------------------------------------
 1 | def array(a,b=none,c=none,d=none,e=none,f=none,g=none):
 2 |     if (b is none):
 3 |         return native rtl_allocatearray(a)
 4 |     elif (c is none):
 5 |         return native rtl_allocatearray(a,b)
 6 |     elif (d is none):
 7 |         return native rtl_allocatearray(a,b,c)
 8 |     elif (e is none):
 9 |         return native rtl_allocatearray(a,b,c,d)
10 |     elif (f is none):
11 |         return native rtl_allocatearray(a,b,c,d,e)
12 |     elif (g is none):
13 |         return native rtl_allocatearray(a,b,c,d,e,f)
14 |     else:
15 |         return native rtl_allocatearray(a,b,c,d,e,f,g)
16 | 
17 | def shared_mem_array(a,b=none,c=none,d=none,e=none,f=none,g=none):
18 |     if (b is none):
19 |         return native rtl_allocatesharedarray(a)
20 |     elif (c is none):
21 |         return native rtl_allocatesharedarray(a,b)
22 |     elif (d is none):
23 |         return native rtl_allocatesharedarray(a,b,c)
24 |     elif (e is none):
25 |         return native rtl_allocatesharedarray(a,b,c,d)
26 |     elif (f is none):
27 |         return native rtl_allocatesharedarray(a,b,c,d,e)
28 |     elif (g is none):
29 |         return native rtl_allocatesharedarray(a,b,c,d,e,f)
30 |     else:
31 |         return native rtl_allocatesharedarray(a,b,c,d,e,f,g)
32 | 
33 | def flatten(arr):
34 |     native rtl_flatten(arr, size(arr))
35 |     return arr
36 | 
37 | def arraycopy(target, source):
38 |     if (len(target) != len(source)):
39 |         print "Error, array copy overall sizes must match"
40 |         exit()
41 |     else:
42 |         native rtl_arraycopy(target, source, ndim(target), ndim(source), len(target))
43 | 
44 | def size(arr):
45 |     dims=ndim(arr)
46 |     if dims > 0:
47 |         s=shape(arr)
48 |         arraylength=1
49 |         i=0
50 |         while i<dims:
51 |             arraylength*=s[i]
52 |             i+=1
53 |         return arraylength
54 |     else:
55 |         return 0
56 | 
57 | def freearray(arr):
58 |     native rtl_free(arr)
59 | 
60 | def len(arr):
61 |     return size(arr)
62 | 
63 | def ndim(arr):
64 |     return native rtl_numdims(arr)
65 | 
66 | def shape(arr):
67 |     i=ndim(arr)
68 |     shape_val=[0]*i
69 |     j=0
70 |     while j<i:
71 |         shape_val[j]=native rtl_dsize(arr, j)
72 |         j+=1
73 |     return shape_val
74 | 
75 | def nbytes(arr):
76 |     return size(arr) * 4
77 | 


--------------------------------------------------------------------------------
/host/device-support.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2015, Nick Brown
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *
 8 |  * Redistributions of source code must retain the above copyright notice, this
 9 |  * list of conditions and the following disclaimer.
10 |  *
11 |  * Redistributions in binary form must reproduce the above copyright notice,
12 |  * this list of conditions and the following disclaimer in the documentation
13 |  * and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #include "../shared.h"
28 | #include "configuration.h"
29 | 
30 | #ifndef DEVICE_SUPPORT_H_
31 | #define DEVICE_SUPPORT_H_
32 | 
33 | extern volatile unsigned int * pb;
34 | 
35 | #define EPIPHANY_BINARY_FILE "epython-device"
36 | // Binary directory path for finding epython device binary, needs trailing slash
37 | #define BIN_PATH "/usr/bin/"
38 | 
39 | // Memory location for each core where we start the symbol table, bytecode, data area etc...
40 | #define CORE_DATA_START 0x6000
41 | // If the length of Python byte code is greater than this then place in shared memory (unless overridden by command line)
42 | #define CORE_CODE_MAX_SIZE 2048
43 | 
44 | struct shared_basic * loadCodeOntoEpiphany(struct interpreterconfiguration*);
45 | void monitorCores(struct shared_basic*, struct interpreterconfiguration*);
46 | void finaliseCores(void);
47 | 
48 | #endif /* DEVICE_SUPPORT_H_ */
49 | 


--------------------------------------------------------------------------------
/shared.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2015, Nick Brown
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *
 8 |  * Redistributions of source code must retain the above copyright notice, this
 9 |  * list of conditions and the following disclaimer.
10 |  *
11 |  * Redistributions in binary form must reproduce the above copyright notice,
12 |  * this list of conditions and the following disclaimer in the documentation
13 |  * and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef SHARED_H_
28 | #define SHARED_H_
29 | 
30 | #define TOTAL_CORES 16
31 | // Start location in shared memory where we place the data structures
32 | #define EXTERNAL_MEM_ABSOLUTE_START 0x01000000
33 | 
34 | #define SHARED_HEAP_DATA_AREA_PER_CORE 0x6D600
35 | #define SHARED_STACK_DATA_AREA_PER_CORE 0xFA00
36 | #define SHARED_DATA_AREA_START 0x00200000
37 | #define SHARED_CODE_AREA_START 0x00100000
38 | #define SHARED_DATA_SIZE 0x01000000
39 | #define LOCAL_CORE_MEMORY_MAP_TOP 0x8000
40 | #define LOCAL_CORE_STACK_SIZE 0x400
41 | 
42 | struct core_ctrl {
43 | 	unsigned int core_run, core_busy, core_command;
44 | 	char *symbol_table, *stack_start, *heap_start,
45 | 			*shared_heap_start, *shared_stack_start, *postbox_start,
46 | 			*host_shared_data_start;
47 | 	char data[15];
48 | 	char active;
49 | } __attribute__((aligned(8)));
50 | 
51 | struct shared_basic {
52 | 	struct core_ctrl core_ctrl[16];
53 | 	unsigned int length, num_procs, baseHostPid;
54 | 	unsigned short symbol_size;
55 | 	char *edata, *data, *esdata, allInSharedMemory, codeOnCores;
56 | } __attribute__((aligned(8)));
57 | 
58 | #endif /* SHARED_H_ */
59 | 


--------------------------------------------------------------------------------
/modules/taskfarm.py:
--------------------------------------------------------------------------------
 1 | import parallel
 2 | import array
 3 | 
 4 | _remoteFunctionCallState=0
 5 | _masterTask=0
 6 | 
 7 | def initTaskFarm(masterTask):
 8 | 	_masterTask=masterTask
 9 | 	if (coreid()==masterTask):
10 | 		_remoteFunctionCallState=array(numcores())
11 | 		_remoteFunctionCallState[masterTask]=false
12 | 
13 | def shutdownTaskFarm():
14 | 	i=0
15 | 	while i<numcores():
16 | 		if (i != _masterTask):
17 | 			waitFunctionFinish(i)
18 | 			send(-1, i)
19 | 		i+=1
20 | 
21 | def execFunction(pid, op, a=none, b=none, c=none, d=none, e=none):
22 | 	waitFunctionFinish(pid)
23 | 	_remoteFunctionCallState[pid]=true
24 | 	num_args=0
25 | 	if (!a is none): num_args+=1
26 | 	if (!b is none): num_args+=1
27 | 	if (!c is none): num_args+=1
28 | 	if (!d is none): num_args+=1
29 | 	if (!e is none): num_args+=1
30 | 	send(num_args, pid)
31 | 	send(op, pid)
32 | 	if (!a is none): sendArgument(pid, a)
33 | 	if (!b is none): sendArgument(pid, b)
34 | 	if (!c is none): sendArgument(pid, c)
35 | 	if (!d is none): sendArgument(pid, d)
36 | 	if (!e is none): sendArgument(pid, e)
37 | 
38 | def testFunctionFinish(pid):
39 | 	if (_remoteFunctionCallState[pid]):
40 | 		return probe(pid)
41 | 	return false
42 | 
43 | def waitFunctionFinish(pid):
44 | 	if (_remoteFunctionCallState[pid]):
45 | 		_remoteFunctionCallState[pid]=false
46 | 		recvCount=recv(pid)
47 | 		if (recvCount == -1):
48 | 			return none
49 | 		elif (recvCount == 0):
50 | 			return recv(pid)
51 | 		else:
52 | 			return recv(pid, recvCount)
53 | 
54 | def sendArgument(pid, arg):
55 | 	datalen=len(arg)
56 | 	send(datalen, pid)
57 | 	if (datalen > 0):
58 | 		send(arg, pid, datalen)
59 | 	else:
60 | 		send(arg, pid)
61 | 
62 | def worker():
63 | 	num_args=recv(_masterTask)
64 | 	while num_args >= 0:
65 | 		op=recv(_masterTask)
66 | 		retVal=none
67 | 		if (num_args == 0): retVal=op()
68 | 		if (num_args == 1): retVal=op(recvArgument(_masterTask))
69 | 		if (num_args == 2): retVal=op(recvArgument(_masterTask), recvArgument(_masterTask))
70 | 		if (num_args == 3): retVal=op(recvArgument(_masterTask), recvArgument(_masterTask), recvArgument(_masterTask))
71 | 		if (num_args == 4): retVal=op(recvArgument(_masterTask), recvArgument(_masterTask), recvArgument(_masterTask), recvArgument(_masterTask))
72 | 		if (num_args == 5): retVal=op(recvArgument(_masterTask), recvArgument(_masterTask), recvArgument(_masterTask), recvArgument(_masterTask), recvArgument(_masterTask))
73 | 		if (retVal is none):
74 | 			send(-1, _masterTask)
75 | 		else:
76 | 			datalen=len(retVal)
77 | 			send(datalen, _masterTask)
78 | 			if (datalen==0):
79 | 				send(retVal, _masterTask)
80 | 			else:
81 | 				send(retVal, _masterTask, datalen)
82 | 		num_args=recv(_masterTask)
83 | 
84 | def recvArgument(pid):
85 | 	length=recv(pid)
86 | 	if (length==0):
87 | 		return recv(pid)
88 | 	else:
89 | 		return recv(pid, length)
90 | 


--------------------------------------------------------------------------------
/examples/mergesort.py:
--------------------------------------------------------------------------------
  1 | /*
  2 | Parallel mergesort using divide and conquer. An unsorted random list of numbers is generated on core 0, then each core will split the data until 
  3 | there are no more cores left, then each will sequentially solve its base case using a bubblesort. The sorted results on each core are then merged 
  4 | back together and core 0 will display the sorted list. Whilst the sequential (bubblesort) algorithm is inefficient, this illustrates the general
  5 | concept and could be swapped out for something better such as quicksort if desired.
  6 | */
  7 | 
  8 | from util import *
  9 | from parallel import *
 10 | from random import randrange
 11 | from array import array
 12 | 
 13 | na=128
 14 | data=array(na)
 15 | 
 16 | if coreid()==0:
 17 | 	populateData(data, na)
 18 | 	sort(data, 0, na)
 19 | 	displayData(data, na)
 20 | else:
 21 | 	level=getlevel()
 22 | 	pid=getparentId(level)
 23 | 	dsize=na/(2^level)
 24 | 	d=recv(pid, dsize)
 25 | 	sort(d, level, dsize)
 26 | 	send(d, coreid() - (2^(4-level)), dsize)
 27 | 	
 28 | def sort(d, level, thissize):
 29 | 	if level == 4:
 30 | 		bubblesort(d, thissize)
 31 | 	else:
 32 | 		pivot=thissize/2
 33 | 		cid=coreid() + (2^(3-level))
 34 | 		send(d, cid, pivot)
 35 | 		split=array(thissize-pivot)
 36 | 		for x in range(pivot, thissize-1):
 37 | 			split[x-pivot]=d[x]
 38 | 		sort(split,level+1, thissize-pivot)
 39 | 		ssplit=recv(cid, pivot)
 40 | 		merge(d, split, ssplit, pivot, thissize)
 41 | 
 42 | def merge(target, split, ssplit, pivot, length):
 43 | 	i=0
 44 | 	pre_index=0
 45 | 	post_index=0
 46 | 	while i<length:
 47 | 		if pre_index >= pivot:
 48 | 			target[i]=ssplit[post_index]
 49 | 			post_index=post_index+1
 50 | 		elif post_index >=length-pivot:
 51 | 			target[i]=split[pre_index]
 52 | 			pre_index=pre_index+1
 53 | 		elif split[pre_index] < ssplit[post_index]:
 54 | 			target[i]=split[pre_index]
 55 | 			pre_index=pre_index+1
 56 | 		else:
 57 | 			target[i]=ssplit[post_index]
 58 | 			post_index=post_index+1
 59 | 		i=i+1
 60 | 
 61 | def bubblesort(d,size):
 62 | 	i=0
 63 | 	while i<size:
 64 | 		j=0
 65 | 		while j<size:
 66 | 			if d[j] > d[i]:
 67 | 				temp=d[i]
 68 | 				d[i]=d[j]
 69 | 				d[j]=temp
 70 | 			j=j+1
 71 | 		i=i+1
 72 | 
 73 | 
 74 | def populateData(d, l):
 75 | 	i=0
 76 | 	while i<l:
 77 | 		d[i]=randrange(1000)
 78 | 		i=i+1
 79 | 
 80 | def displayData(d, l):
 81 | 	i=0
 82 | 	while i<l:
 83 | 		print d[i]
 84 | 		i=i+1
 85 | 
 86 | def getlevel(level):
 87 | 	i=0
 88 | 	cc=numcores()
 89 | 	while cc > 1:
 90 | 		if isdivbyn(coreid(), cc):	return i
 91 | 		cc=cc/2
 92 | 		i=i+1
 93 | 	return i
 94 | 
 95 | def getparentId(level):
 96 | 	cc=numcores()
 97 | 	for x in range(1,level):
 98 | 		cc=cc/2
 99 | 	return coreid()-cc
100 | 
101 | def isdivbyn(a,b):
102 | 	if (a/b)*b==a:
103 | 		return true
104 | 	else:
105 | 		return false
106 | 	
107 | 


--------------------------------------------------------------------------------
/interpreter/functions.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2015, Nick Brown
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *
 8 |  * Redistributions of source code must retain the above copyright notice, this
 9 |  * list of conditions and the following disclaimer.
10 |  *
11 |  * Redistributions in binary form must reproduce the above copyright notice,
12 |  * this list of conditions and the following disclaimer in the documentation
13 |  * and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef FUNCTIONS_H_
28 | #define FUNCTIONS_H_
29 | 
30 | #include "interpreter.h"
31 | 
32 | /*
33 |  * These functions are implemented by the device and host to support running in normal parallel core
34 |  * Epiphany mode, and also standalone host mode only which is useful for interpreter development/testing
35 |  */
36 | 
37 | #ifdef HOST_INTERPRETER
38 | void callNativeFunction(struct value_defn*, unsigned char, int, struct value_defn*,int,int,int,struct symbol_node*,int);
39 | char* getHeapMemory(int,char,int);
40 | void freeMemoryInHeap(void*,int);
41 | void syncCores(int, int);
42 | struct value_defn performStringConcatenation(struct value_defn, struct value_defn, int);
43 | #else
44 | void callNativeFunction(struct value_defn*, unsigned char, int, struct value_defn*, int, int, int, struct symbol_node*);
45 | char* getHeapMemory(int,char,int,struct symbol_node*);
46 | void freeMemoryInHeap(void*);
47 | void syncCores(int);
48 | struct value_defn performStringConcatenation(struct value_defn, struct value_defn, int, struct symbol_node*);
49 | #endif
50 | int checkStringEquality(struct value_defn, struct value_defn);
51 | struct symbol_node* initialiseSymbolTable(int);
52 | char* getStackMemory(int,char);
53 | void clearFreedStackFrames(char*);
54 | void cpy(volatile void*, volatile void *, unsigned int);
55 | void raiseError(unsigned char);
56 | int slength(char*);
57 | int getInt(void*);
58 | float getFloat(void*);
59 | 
60 | #endif /* FUNCTIONS_H_ */
61 | 


--------------------------------------------------------------------------------
/host/memorymanager.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2015, Nick Brown
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *
 8 |  * Redistributions of source code must retain the above copyright notice, this
 9 |  * list of conditions and the following disclaimer.
10 |  *
11 |  * Redistributions in binary form must reproduce the above copyright notice,
12 |  * this list of conditions and the following disclaimer in the documentation
13 |  * and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef MEMORYMANAGER_H_
28 | #define MEMORYMANAGER_H_
29 | 
30 | #include "byteassembler.h"
31 | 
32 | // Used to maintain a linked list of functions
33 | struct functionListNode {
34 | 	struct functionDefinition * fn;
35 | 	struct functionListNode * next;
36 | };
37 | 
38 | struct exportableFunctionTableNode {
39 |     char * functionName;
40 |     unsigned short functionLocation;
41 |     struct exportableFunctionTableNode * next;
42 | };
43 | 
44 | extern struct exportableFunctionTableNode* exportableFunctionTable;
45 | extern int numberExportableFunctionsInTable;
46 | 
47 | int getNumberOfSymbolEntriesNotUsed(void);
48 | void addFunction(struct functionDefinition*);
49 | int getNumberSymbolTableEntriesForRecursion(void);
50 | void compileMemory(struct memorycontainer*);
51 | struct memorycontainer* concatenateMemory(struct memorycontainer*, struct memorycontainer*);
52 | struct memorycontainer* cloneMemory(struct memorycontainer*);
53 | unsigned int appendStatement(struct memorycontainer*, unsigned char, unsigned int);
54 | unsigned int appendMemory(struct memorycontainer*, struct memorycontainer*, unsigned int);
55 | unsigned int appendVariable(struct memorycontainer*, unsigned short, unsigned int);
56 | unsigned int getMemoryFilledSize(void);
57 | void setMemoryFilledSize(unsigned int);
58 | char * getAssembledCode(void);
59 | void setAssembledCode(char*);
60 | 
61 | extern struct function_call_tree_node mainCodeCallTree;
62 | 
63 | #endif /* MEMORYMANAGER_H_ */
64 | 


--------------------------------------------------------------------------------
/interpreter/interpreter.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2015, Nick Brown
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *
 8 |  * Redistributions of source code must retain the above copyright notice, this
 9 |  * list of conditions and the following disclaimer.
10 |  *
11 |  * Redistributions in binary form must reproduce the above copyright notice,
12 |  * this list of conditions and the following disclaimer in the documentation
13 |  * and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef INTERPRETER_H_
28 | #define INTERPRETER_H_
29 | 
30 | #include "../shared.h"
31 | 
32 | #define INT_TYPE 0
33 | #define REAL_TYPE 1
34 | #define STRING_TYPE 2
35 | #define BOOLEAN_TYPE 3
36 | #define NONE_TYPE 4
37 | #define FN_ADDR_TYPE 5
38 | 
39 | #define INT_PTR_TYPE 100
40 | #define REAL_PTR_TYPE 101
41 | #define STRING_PTR_TYPE 102
42 | #define BOOLEAN_PTR_TYPE 103
43 | #define NONE_PTR_TYPE 104
44 | #define FN_ADDR_PTR_TYPE 105
45 | 
46 | #define SCALAR 0
47 | #define ARRAY 1
48 | 
49 | #define UNALLOCATED 1
50 | #define ALLOCATED 2
51 | #define ALIAS 3
52 | 
53 | // The value in a symbol table; its type and data (which is integer/real or pointer to string
54 | // or array.) In host mode this is 8 bytes as often pointers are 64bit, but on Epiphany only 4 byte as 32 bit pointers
55 | struct value_defn {
56 | 	char type, dtype;
57 | #ifdef HOST_STANDALONE
58 | 	char data[8];
59 | #else
60 | 	char data[4];
61 | #endif
62 | };
63 | 
64 | // A node in the symbol table - its id and value
65 | struct symbol_node {
66 | 	unsigned short id, alias;
67 | 	unsigned char state, level;
68 | 	struct value_defn value __attribute__((aligned(8)));
69 | };
70 | 
71 | #ifdef HOST_INTERPRETER
72 | extern volatile char * stopInterpreter;
73 | void runIntepreter(char*, unsigned int, unsigned short, int, int, int);
74 | void initThreadedAspectsForInterpreter(int, int, struct shared_basic*);
75 | #else
76 | extern char stopInterpreter;
77 | void runIntepreter(char*, unsigned int, unsigned short, int, int, int);
78 | #endif
79 | #endif /* INTERPRETER_H_ */
80 | 


--------------------------------------------------------------------------------
/examples/parallel_pipeline.py:
--------------------------------------------------------------------------------
  1 | from parallel import *
  2 | from util import *
  3 | from random import randrange
  4 | from array import len, array
  5 | 
  6 | sorting_size=100
  7 | data=[0]*sorting_size
  8 | 
  9 | if (coreid()==0):
 10 | 	pipelineStageOne(10)
 11 | elif (coreid()==1):
 12 | 	pipelineStageTwo()
 13 | elif (coreid() >= 2 and coreid() <= 14):
 14 | 	pipelineStageThree()
 15 | elif (coreid()==15):
 16 | 	pipelineStageFour()
 17 | 
 18 | def pipelineStageOne(num_items):
 19 | 	for i in range(num_items):
 20 | 		num=randrange(sorting_size-5) + 5
 21 | 		num+=num % 13
 22 | 		send(num, coreid()+1)
 23 | 	send(-1,coreid()+1)
 24 | 
 25 | def pipelineStageTwo():
 26 | 	num=0
 27 | 	while num >= 0:
 28 | 		num=recv(coreid()-1)
 29 | 		j=2
 30 | 		while j<=14:
 31 | 			if num > 0:
 32 | 				i=0
 33 | 				while i < num/13:
 34 | 					data[i]=randrange(1000)
 35 | 					i+=1
 36 | 				send(num/13, j)
 37 | 				send(data, j, num/13)
 38 | 			else:
 39 | 				send(-1, j)
 40 | 			j+=1
 41 | 
 42 | def pipelineStageThree():
 43 | 	num=0
 44 | 	while num >=0:
 45 | 		num=recv(1)
 46 | 		if num > 0:
 47 | 			data=recv(1, num)
 48 | 			parallel_odd_even_sort(num)
 49 | 		send(num, 15)
 50 | 		if num > 0: send(data, 15, num)
 51 | 
 52 | def pipelineStageFour():
 53 | 	rdata=array(100)
 54 | 	num=0
 55 | 	num_contig=0.0
 56 | 	total_num=0
 57 | 	while num >=0:
 58 | 		i=2
 59 | 		while i<=14:
 60 | 			num=recv(i)
 61 | 			if (num > 0):
 62 | 				rdata=recv(i, num)
 63 | 				j=num*i
 64 | 				while j<num*(i+1):
 65 | 					data[j]=rdata[j - (num*i)]
 66 | 					j+=1
 67 | 			i+=1
 68 | 		if num > 0:
 69 | 			num*=13
 70 | 			total_num+=num
 71 | 			cnum=data[0]
 72 | 			ccount=0
 73 | 			i=0
 74 | 			while i < num:
 75 | 				if (data[i] == cnum):
 76 | 					ccount+=1
 77 | 				else:
 78 | 					num_contig+=ccount
 79 | 					cnum=data[i]
 80 | 					ccount=0
 81 | 				i+=1
 82 | 		chance=(num_contig/total_num)*100
 83 | 		print chance+"% of numbers were contiguous"
 84 | 
 85 | def parallel_odd_even_sort(ln):
 86 | 	other=array(ln)
 87 | 	i=2
 88 | 	while i <= 14:
 89 | 		oddSort(data,ln)
 90 | 		partner=0
 91 | 		if (i%2 == 0):
 92 | 			if (coreid() % 2 == 0):
 93 | 				partner=coreid()+1
 94 | 			else:
 95 | 				partner=coreid()-1
 96 | 		else:
 97 | 			if (coreid()%2 == 0):
 98 | 				partner=coreid()-1
 99 | 			else:
100 | 				partner=coreid()+1
101 | 		if (partner >= 2 and partner <= 14):
102 | 			other=sendrecv(data, partner, ln)
103 | 			if coreid() < partner:
104 | 				swap_values(other, data)
105 | 			else:
106 | 				swap_values(data, other)
107 | 		i+=1
108 | 
109 | def swap_values(a, b):
110 | 	searching=true
111 | 	while searching:
112 | 		searching=false
113 | 		min_index=get_min_index(a)
114 | 		max_index=get_max_index(b)
115 | 		if (a[min_index] < b[max_index]):
116 | 			temp=a[min_index]
117 | 			a[min_index]=b[max_index]
118 | 			b[max_index]=temp
119 | 			searching=true
120 | 
121 | def get_min_index(a):
122 | 	v=0
123 | 	j=-1
124 | 	i=0
125 | 	while i < len(a):
126 | 		if (j == -1 or v > a[i]): 
127 | 			v=a[i]
128 | 			j=i
129 | 		i+=1
130 | 	return j
131 | 
132 | def get_max_index(a):
133 | 	v=0
134 | 	j=-1
135 | 	i=0
136 | 	while i < len(a):
137 | 		if (j == -1 or v < a[i]): 
138 | 			v=a[i]
139 | 			j=i
140 | 		i+=1
141 | 	return j
142 | 


--------------------------------------------------------------------------------
/device/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2015, Nick Brown
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *
 8 |  * Redistributions of source code must retain the above copyright notice, this
 9 |  * list of conditions and the following disclaimer.
10 |  *
11 |  * Redistributions in binary form must reproduce the above copyright notice,
12 |  * this list of conditions and the following disclaimer in the documentation
13 |  * and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #include <e-lib.h>
28 | #include "shared.h"
29 | #include "interpreter.h"
30 | #include "main.h"
31 | #include "functions.h"
32 | 
33 | volatile e_barrier_t syncbarriers[TOTAL_CORES], collectivebarriers[TOTAL_CORES];
34 | e_barrier_t *sync_tgt_bars[TOTAL_CORES], *collective_tgt_bars[TOTAL_CORES];
35 | volatile unsigned char syncValues[TOTAL_CORES];
36 | volatile struct shared_basic * sharedData;
37 | int myId, lowestCoreId;
38 | 
39 | static void init_barrier(volatile e_barrier_t[], e_barrier_t *[]);
40 | 
41 | /**
42 |  * Core entry point, sets the stuff up and then runs the interpreter
43 |  */
44 | int main() {
45 | 	myId=e_group_config.core_row * e_group_config.group_cols + e_group_config.core_col;
46 | 	sharedData=(void*) (e_emem_config.base + EXTERNAL_MEM_ABSOLUTE_START);
47 | 
48 | 	while (sharedData->core_ctrl[myId].core_run == 0) {};
49 | 	sharedData->core_ctrl[myId].core_busy=1;
50 | 	sharedData->core_ctrl[myId].core_run=1;
51 | 
52 | 	int i;
53 | 	lowestCoreId=TOTAL_CORES;
54 | 	for (i=0;i<TOTAL_CORES;i++) {
55 | 		syncValues[i]=0;
56 | 		if (sharedData->core_ctrl[i].active) {
57 | 			if (i< lowestCoreId) lowestCoreId=i;
58 | 		}
59 | 	}
60 | 
61 | 	init_barrier(syncbarriers, sync_tgt_bars);
62 | 	init_barrier(collectivebarriers, collective_tgt_bars);
63 | 
64 | 	if (sharedData->codeOnCores) {
65 | 		cpy(sharedData->edata, sharedData->esdata, sharedData->length);
66 | 	}
67 | 
68 | 	syncCores(0);
69 | 	runIntepreter(sharedData->edata, sharedData->length, sharedData->symbol_size, myId, sharedData->num_procs, sharedData->baseHostPid);
70 | 	sharedData->core_ctrl[myId].core_busy=0;
71 | 	sharedData->core_ctrl[myId].core_run=0;
72 | 	return 0;
73 | }
74 | 
75 | /**
76 |  * Initialises an Epiphany barrier, this is based upon the version in elib, but works when core 0 is not in use
77 |  * and over a subset of cores
78 |  */
79 | static void init_barrier(volatile e_barrier_t barrier_array[], e_barrier_t  * target_barrier_array[]) {
80 | 	int i, row, col;
81 | 	for (i=0; i<TOTAL_CORES; i++)
82 | 		barrier_array[i] = 0;
83 | 
84 | 	if (myId == lowestCoreId) {
85 | 		for (i=0; i<TOTAL_CORES; i++) {
86 | 			row=i/e_group_config.group_cols;
87 | 			col=i-(row*e_group_config.group_cols);
88 | 			target_barrier_array[i] = (e_barrier_t *) e_get_global_address(row, col, (void *) &(barrier_array[0]));
89 | 		}
90 | 	} else {
91 | 		row=lowestCoreId/e_group_config.group_cols;
92 | 		col=lowestCoreId-(row*e_group_config.group_cols);
93 | 		target_barrier_array[0] = (e_barrier_t *) e_get_global_address(row, col, (void *) &(barrier_array[myId]));
94 | 	}
95 | }
96 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Epiphany Python
 2 | This is a Python interpreter designed for low memory many core chips such as the Epiphany co-processor and supports the writing of parallel codes for these architectures. The interpreter and runtime resident in the memory of the actual core is only 24Kb, with the remainder of core memory available for the user's byte code and data. This 24Kb implementation is standalone which means that ePython works both with many core processors executing independently and those working as co-processors with some extra shared memory between the host. Hence ePython is specifically designed to be a very small, tight implementation of the imperative aspects of Python with extensions (via Python modules) for parallelism such as messaging, task farming, interoperability with a full Python interpreter (such as CPython) running on the host and many other features. ePython also supports full memory management and garbage collection.
 3 | 
 4 | ePython has been developed by <a href="https://www.epcc.ed.ac.uk/about/staff/dr-nick-brown">Nick Brown</a> and is [licenced](LICENCE) under BSD-2.
 5 | 
 6 | ## Installation
 7 | ePython comes pre-installed with the latest Parallella Linux image so it should be all set up and ready to go as soon as you switch the machine on. If you do want to manually install ePython then execute *make* and then *sudo make install* at the command line. Importantly you will then need to start a new bash session (either log out and log back in, or execute *bash* at the command line.
 8 | 
 9 | For more information about installing ePython refer [here](docs/tutorial1.md), for upgrading ePython refer [here](docs/installupgrade.md)
10 | 
11 | ## Hello world
12 | Create a file called hello.py:
13 | 
14 | ```python
15 | print "Hello world"
16 | ```
17 | 
18 | Now execute *epython hello.py* , each core will display the Hello world message on the screen. This is an example of running code directly on the Epiphany cores and more information can be found [here](docs/tutorial1.md)
19 | 
20 | You can also use ePython to offload kernels to the Epiphany and use it as an accelerator. For instance create a file called hello2.py:
21 | 
22 | ```python
23 | from epython import offload
24 | 
25 | @offload
26 | def helloworld():
27 |    print "Hello World"
28 |     
29 | helloworld()
30 | ```
31 | 
32 | Execute *python hello2.py* and again you will see the Hello world message on the screen. This is very different from the previous example, because the code is running via CPython on the host and simply offloading this function (*helloworld*) to each Epiphany core. If you comment out the *offload* directive and rerun you will see the host display the message instead. Take a look at [this tutorial](docs/tutorial6.md) for more information and examples around offloading.
33 | 
34 | ## Troubleshooting
35 | 
36 | Often these are set by default, but if it complains that it can not find e-gcc or the libraries, then you will need to set these environment variables:
37 | 
38 | export PATH=/opt/adapteva/esdk/tools/e-gnu/bin:$PATH
39 | export EPIPHANY_HOME=/opt/adapteva/esdk
40 | 
41 | (you might want to place this in your .bashrc file)
42 | 
43 | ## More advanced installation
44 | 
45 | If you do not install ePython then you can still run epython from the current directory, as ./epython.sh but ensure that epython-device.elf is in the current directory when you run the interpreter. The epython.sh script will detect whether to run as sudo (earlier versions of the parallella OS) or not (later versions.)
46 | 
47 | In order to include files (required for parallel functions) you must either run your Python codes in the same directory as the executables (and the modules directory) and/or export the EPYTHONPATH environment variable to point to the modules directory. When including files, by default ePython will search in the current directory, any subdirectory called modules and then the EPYTHONPATH variable, which follows the same syntax as the PATH variable.
48 | 
49 | Issuing export EPYTHONPATH=$EPYTHONPATH:`pwd`/modules in the epython directory will set this to point to the current directory. You can also modify your ~/.bashrc file to contain a similiar command. For offload support you will need to export PYTHONPATH=$PYTHONPATH:`pwd`/modules/fullpython
50 | 
51 | ## Rebuilding the parser/lexer
52 | To rebuild the parser and lexer too, then execute *make full*
53 | 
54 | ## SREC and ELF
55 | 
56 | The device executable is built in both SREC and ELF format, as of 2016 the loading of SREC on the Epiphany is deprecated and will be removed from later SDK releases. You can choose which to load via the -elf and -srec command line arguments. ELF is the default for ePython, apart from old Epiphany SDK versions which support SREC.
57 | 


--------------------------------------------------------------------------------
/interpreter/basictokens.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2015, Nick Brown
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that the following conditions are met:
  7 |  *
  8 |  * Redistributions of source code must retain the above copyright notice, this
  9 |  * list of conditions and the following disclaimer.
 10 |  *
 11 |  * Redistributions in binary form must reproduce the above copyright notice,
 12 |  * this list of conditions and the following disclaimer in the documentation
 13 |  * and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef BASICTOKENS_H_
 28 | #define BASICTOKENS_H_
 29 | 
 30 | #define LET_TOKEN 0x00
 31 | #define STOP_TOKEN 0x01
 32 | #define OR_TOKEN 0x02
 33 | #define AND_TOKEN 0x03
 34 | #define EQ_TOKEN 0x04
 35 | #define NEQ_TOKEN 0x05
 36 | #define LT_TOKEN 0x06
 37 | #define GT_TOKEN 0x07
 38 | #define LEQ_TOKEN 0x08
 39 | #define GEQ_TOKEN 0x09
 40 | #define ADD_TOKEN 0x0A
 41 | #define SUB_TOKEN 0x0B
 42 | #define MUL_TOKEN 0x0C
 43 | #define DIV_TOKEN 0x0D
 44 | #define MOD_TOKEN 0x0E
 45 | #define IDENTIFIER_TOKEN 0x0F
 46 | #define REAL_TOKEN 0x10
 47 | #define STRING_TOKEN 0x11
 48 | #define INTEGER_TOKEN 0x12
 49 | #define IF_TOKEN 0x13
 50 | #define FOR_TOKEN 0x14
 51 | #define GOTO_TOKEN 0x15
 52 | #define ARRAYACCESS_TOKEN 0x16
 53 | #define ARRAYSET_TOKEN 0x17
 54 | #define IFELSE_TOKEN 0x18
 55 | #define POW_TOKEN 0x19
 56 | #define RETURN_TOKEN 0x1A
 57 | #define FNCALL_TOKEN 0x1B
 58 | #define RETURN_EXP_TOKEN 0x1C
 59 | #define BOOLEAN_TOKEN 0x1D
 60 | #define LETNOALIAS_TOKEN 0x1E
 61 | #define NONE_TOKEN 0x1F
 62 | #define IS_TOKEN 0x20
 63 | #define ARRAY_TOKEN 0x21
 64 | #define NOT_TOKEN 0x22
 65 | #define NATIVE_TOKEN 0x23
 66 | #define FN_ADDR_TOKEN 0x24
 67 | #define FNCALL_BY_VAR_TOKEN 0x25
 68 | #define REFERENCE_TOKEN 0x26
 69 | #define SYMBOL_TOKEN 0x27
 70 | #define ALIAS_TOKEN 0x28
 71 | 
 72 | #define ERR_STR_ONLYTEST_EQ 0x00
 73 | #define ERR_NONE_ONLYTEST_EQ 0x01
 74 | #define ERR_ONLY_ADDITION_STR 0x02
 75 | #define ERR_TOO_MANY_ARR_INDEX 0x03
 76 | #define ERR_NEG_ARR_INDEX 0x04
 77 | #define ERR_ARR_INDEX_EXCEED_SIZE 0x05
 78 | #define ERR_ONLY_DISPLAY_STR_WITH_INPUT 0x06
 79 | #define ERR_OUT_OF_SHARED_HEAP_MEM 0x07
 80 | #define ERR_OUT_OF_CORE_SHARED_HEAP_MEM 0x08
 81 | #define ERR_OUT_OF_SHARED_STACK_MEM 0x09
 82 | #define ERR_OUT_OF_CORE_SHARED_STACK_MEM 0x0A
 83 | #define ERR_ONLY_SEND_INT_AND_REAL 0x0B
 84 | #define ERR_SEND_TO_UNKNOWN_CORE 0x0C
 85 | #define ERR_SEND_TO_INACTIVE_CORE 0x0D
 86 | #define ERR_RECV_FROM_UNKNOWN_CORE 0x0E
 87 | #define ERR_RECV_FROM_INACTIVE_CORE 0x0F
 88 | #define ERR_SENDRECV_WITH_UNKNOWN_CORE 0x10
 89 | #define ERR_FREE_ON_NON_HEAP 0x11
 90 | #define ERR_INCORRECT_NUM_NATIVE_PARAMS 0x12
 91 | #define ERR_UNKNOWN_NATIVE_COMMAND 0x13
 92 | #define ERR_FNCALL_VAR_NOT_CONTAINING_FN_PTR 0x14
 93 | #define ERR_PROBE_NOT_SUPPORTED 0x15
 94 | #define ERR_NBSEND_NOT_SUPPORTED 0x16
 95 | 
 96 | #define NATIVE_FN_RTL_ISHOST 0x00
 97 | #define NATIVE_FN_RTL_ISDEVICE 0x01
 98 | #define NATIVE_FN_RTL_PRINT 0x02
 99 | #define NATIVE_FN_RTL_NUMDIMS 0x03
100 | #define NATIVE_FN_RTL_DSIZE 0x04
101 | #define NATIVE_FN_RTL_INPUT 0x05
102 | #define NATIVE_FN_RTL_INPUTPRINT 0x06
103 | #define NATIVE_FN_RTL_SYNC 0x07
104 | #define NATIVE_FN_RTL_GC 0x08
105 | #define NATIVE_FN_RTL_FREE 0x09
106 | #define NATIVE_FN_RTL_SEND 0x0A
107 | #define NATIVE_FN_RTL_RECV 0x0B
108 | #define NATIVE_FN_RTL_SENDRECV 0x0C
109 | #define NATIVE_FN_RTL_BCAST 0x0D
110 | #define NATIVE_FN_RTL_NUMCORES 0x0E
111 | #define NATIVE_FN_RTL_COREID 0x0F
112 | #define NATIVE_FN_RTL_REDUCE 0x11
113 | #define NATIVE_FN_RTL_ALLOCARRAY 0x12
114 | #define NATIVE_FN_RTL_ALLOCSHAREDARRAY 0x13
115 | #define NATIVE_FN_RTL_MATH 0x14
116 | #define NATIVE_FN_RTL_PROBE_FOR_MESSAGE 0x15
117 | #define NATIVE_FN_RTL_TEST_FOR_SEND 0x16
118 | #define NATIVE_FN_RTL_WAIT_FOR_SEND 0x17
119 | #define NATIVE_FN_RTL_SEND_NB 0x18
120 | #define NATIVE_FN_RTL_GLOBAL_REFERENCE 0x19
121 | #define NATIVE_FN_RTL_DEREFERENCE 0x1A
122 | #define NATIVE_FN_RTL_FLATTEN 0x1B
123 | #define NATIVE_FN_RTL_ARRAYCOPY 0x1C
124 | 
125 | #endif /* BASICTOKENS_H_ */
126 | 


--------------------------------------------------------------------------------
/docs/tutorial1.md:
--------------------------------------------------------------------------------
 1 | # Installing and getting to grips with ePython
 2 | 
 3 | Programming the Epiphany chip is actually very simple, and in this walk through we will be using an Epiphany version the Python programming language (ePython) as our technology. Using ePython you can go from being a complete novice to writing and running your own code on the Epiphany co-processor in 60 seconds. This walk through is intended as an introductory guide, and we will initially discuss installation & configuration of ePython, before looking at some code examples which you can then modify to further explore the concepts.
 4 | 
 5 | ### Installing ePython
 6 | ePython, our version of Python, is open source and available at GitHub. You only need to complete these install commands once, once ePython is installed it can be used as many times as you like. Log into your Parallella board as usual and issue the command
 7 | 
 8 | ```
 9 | git clone https://github.com/mesham/epython.git
10 | ```
11 | 
12 | This will copy ePython onto your machine and a directory called epython will have been created. Next we are going to build this by issuing the make command, from the top level directory (the one you issued the git clone into) issue the following two commands:
13 | 
14 | ```
15 | cd epython
16 | make
17 | ```
18 | 
19 | The build process takes around 20-30 seconds and once complete you will be returned to the bash prompt. The next (and last) step is to install ePython to a central location by issuing the two commands below. You will be promoted for your user's password with the first command. The second command (bash) starts a new bash terminal which has the python paths correctly set and every time bash starts from now on it will be correctly configured for ePython.
20 | 
21 | ```
22 | sudo make install
23 | bash
24 | ```
25 | 
26 | Congratulations! You have installed ePython and are ready to start programming! 
27 | 
28 | ### Let's get coding!
29 | 
30 | Open a text editor and enter the following code, then save this file as hello.py
31 | 
32 | ```python
33 | print "Hello world"
34 | ```
35 | 
36 | Now issue *epython hello.py* and each Epiphany core will display the message "Hello world", along with the ID of that specific core. Well done - you have just run your first program on the Epiphany co-processor, so let's start exploring some more! The *parallel* package provides a number of functions which are useful for parallel codes. We are going to look at the *coreid* and *numcores* functions. Using the text editor, put the following code into your source file *hello.py* and reissue *epython hello.py*
37 | 
38 | ```python
39 | import parallel
40 | print "Hello world from core "+coreid()+" out of "+numcores()+" cores"
41 | ```
42 | 
43 | Line one will import the parallel functions (of which *coreid* and *numcores* are members.) Line two then displays a similar message from each core as the first example, but also includes the ID of each core and total number of cores in the output. We don't have to use all Epiphany cores, one can set the number of cores via the *-d* command line argument, for instance *epython -d 5 hello.py* will only run over five Epiphany cores (you should not select a number greater than the number of physical cores.)
44 | 
45 | ### You're doing great, let's look at something a bit more advanced
46 | 
47 | We have been printing out information about the cores, but we can also use this in other ways too. The first code example in this section will display an even or odd message depending upon the core's id.
48 | 
49 | ```python
50 | import parallel
51 | if coreid() % 2 == 0:
52 |   print "Even core"
53 | else:
54 |   print "Odd core"
55 | ```
56 | 
57 | Now we are going to put this all together and produce a slightly more complex example. In the following code the first core will request a number from the user (this can be an integer or float.) The *bcast* function is then called (part of the parallel package) to broadcast this number from the root process (in this case 0, the second argument to the *bcast* call) to all other cores. Each core then displays the number it has just received.
58 | 
59 | ```python
60 | import parallel
61 | a=0
62 | if coreid()==0:
63 |   a=input("Enter a number: ")
64 | a=bcast(a,0)
65 | print "Number is "+a
66 | ```
67 | 
68 | This is an example of a collective communication, where each process collectively works together to produce some final value. Collective communications form a major corner stone of parallel programming and broadcasting values between processes (or Epiphany cores in this case) is a fundamental aspect of many parallel codes running on modern supercomputers. This topic is covered in more depth in the second tutorial.
69 | 
70 | ### Summary
71 | 
72 | In this walk through we have installed Epiphany Python and then run a few simple, introductory examples to illustrate running codes on the Epiphany co-processor. We have just scratched the surface here and as you can probably imagine, there is far more to explore. ePython comes with complete documentation (doc folder) along with a number of code examples which you can play with.
73 | 


--------------------------------------------------------------------------------
/host/stack.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * stack.c
  3 |  *
  4 |  *  Created on: 9 July 2015
  5 |  *      Author: Nick Brown
  6 |  */
  7 | 
  8 | #include <stdlib.h>
  9 | #include <string.h>
 10 | #include "stack.h"
 11 | 
 12 | struct stack_t* getNewStack(void) {
 13 | 	struct stack_t* newStack=(struct stack_t*) malloc(sizeof(struct stack_t));
 14 | 	initStack(newStack);
 15 | 	return newStack;
 16 | }
 17 | 
 18 | void initStack(struct stack_t* stack) {
 19 |     stack->width=INITIAL_STACK_SIZE;
 20 |     stack->size=0;
 21 |     stack->data=(void**) malloc(sizeof(void*) * INITIAL_STACK_SIZE);
 22 |     stack->type=(char*) malloc(sizeof(char) * INITIAL_STACK_SIZE);
 23 | }
 24 | 
 25 | int getStackSize(struct stack_t* stack) {
 26 | 	return stack->size;
 27 | }
 28 | 
 29 | int pop(struct stack_t* stack) {
 30 |    if (stack->size > 0) {
 31 | 	   int data=*((int*) stack->data[stack->size-1]);
 32 | 	   free(stack->data[--stack->size]);
 33 |        return data;
 34 |    }
 35 |    return -1;
 36 | }
 37 | 
 38 | void clearStack(struct stack_t* stack) {
 39 | 	stack->size=0;
 40 | 	free(stack->data);
 41 | 	free(stack->type);
 42 | 	stack->width=INITIAL_STACK_SIZE;
 43 | }
 44 | 
 45 | char* popIdentifier(struct stack_t* stack) {
 46 |    if (stack->size > 0) {
 47 |        return (char*) stack->data[--stack->size];
 48 |    }
 49 |    return NULL;
 50 | }
 51 | 
 52 | struct memorycontainer* popExpression(struct stack_t* stack) {
 53 |    if (stack->size > 0) {
 54 |        return (struct memorycontainer*) stack->data[--stack->size];
 55 |    }
 56 |    return NULL;
 57 | }
 58 | 
 59 | struct identifier_exp* popExpressionIdentifier(struct stack_t* stack) {
 60 | 	if (stack->size > 0) {
 61 | 		return (struct identifier_exp*) stack->data[--stack->size];
 62 | 	}
 63 | 	return NULL;
 64 | }
 65 | 
 66 | struct identifier_exp* getExpressionIdentifierAt(struct stack_t* stack, int index) {
 67 | 	if (stack->size > index) {
 68 | 		return (struct identifier_exp*) stack->data[index];
 69 | 	}
 70 | 	return NULL;
 71 | }
 72 | 
 73 | struct memorycontainer* getExpressionAt(struct stack_t* stack, int index) {
 74 |    if (stack->size > index) {
 75 |        return (struct memorycontainer*) stack->data[index];
 76 |    }
 77 |    return NULL;
 78 | }
 79 | 
 80 | char* getIdentifierAt(struct stack_t* stack, int index) {
 81 | 	if (stack->size > index) {
 82 | 		return (char*) stack->data[index];
 83 | 	}
 84 | 	return NULL;
 85 | }
 86 | 
 87 | int getTopType(struct stack_t* stack) {
 88 | 	if (stack->size > 0) {
 89 | 		return stack->type[stack->size];
 90 | 	}
 91 | 	return 0;
 92 | }
 93 | 
 94 | int getTypeAt(struct stack_t* stack, int index) {
 95 | 	if (stack->size > index) {
 96 | 		return stack->type[index];
 97 | 	}
 98 | 	return 0;
 99 | }
100 | 
101 | void push(struct stack_t* stack, int val) {
102 | 	stack->size++;
103 |     if (stack->size >= stack->width) {
104 |         stack->width*=2;
105 |         stack->data=(void**) realloc(&stack->data, sizeof(void*) * stack->width);
106 |         stack->type=(char*) realloc(&stack->type, sizeof(char) * stack->width);
107 |     }
108 |     stack->data[stack->size-1]=malloc(sizeof(int));
109 |     stack->type[stack->size-1]=1;
110 |     memcpy(stack->data[stack->size-1], &val, sizeof(int));
111 | }
112 | 
113 | void pushIdentifier(struct stack_t* stack, char* val) {
114 | 	stack->size++;
115 |     if (stack->size >= stack->width) {
116 |         stack->width*=2;
117 |         stack->data=(void**) realloc(&stack->data, sizeof(void*) * stack->width);
118 |         stack->type=(char*) realloc(&stack->type, sizeof(char) * stack->width);
119 |     }
120 |     stack->data[stack->size-1]=malloc(strlen(val)+1);
121 |     stack->type[stack->size-1]=2;
122 |     strcpy(stack->data[stack->size-1], val);
123 | }
124 | 
125 | void pushIdentifierAssgnExpression(struct stack_t* stack, char* val, struct memorycontainer* exp) {
126 | 	stack->size++;
127 |     if (stack->size >= stack->width) {
128 |         stack->width*=2;
129 |         stack->data=(void**) realloc(&stack->data, sizeof(void*) * stack->width);
130 |         stack->type=(char*) realloc(&stack->type, sizeof(char) * stack->width);
131 |     }
132 |     struct identifier_exp atom;
133 |     atom.identifier=(char*) malloc(strlen(val)+1);
134 |     strcpy(atom.identifier, val);
135 |     atom.exp=exp;
136 |     stack->data[stack->size-1]=malloc(sizeof(struct identifier_exp));
137 |     memcpy(stack->data[stack->size-1], &atom, sizeof(struct identifier_exp));
138 |     stack->type[stack->size-1]=4;
139 | }
140 | 
141 | void pushExpression(struct stack_t* stack, struct memorycontainer* exp) {
142 | 	stack->size++;
143 |     if (stack->size >= stack->width) {
144 |         stack->width*=2;
145 |         stack->data=(void**) realloc(&stack->data, sizeof(void*) * stack->width);
146 |         stack->type=(char*) realloc(&stack->type, sizeof(char) * stack->width);
147 |     }
148 |     stack->data[stack->size-1]=exp;
149 |     stack->type[stack->size-1]=3;
150 | }
151 | 
152 | int peek(struct stack_t* stack) {
153 | 	if (stack->size > 0) {
154 | 		return *((int*) stack->data[stack->size-1]);
155 | 	}
156 | 	return -1;
157 | }
158 | 


--------------------------------------------------------------------------------
/host/misc.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016, Nick Brown
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that the following conditions are met:
  7 |  *
  8 |  * Redistributions of source code must retain the above copyright notice, this
  9 |  * list of conditions and the following disclaimer.
 10 |  *
 11 |  * Redistributions in binary form must reproduce the above copyright notice,
 12 |  * this list of conditions and the following disclaimer in the documentation
 13 |  * and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <stdio.h>
 28 | #include <stdlib.h>
 29 | #include <string.h>
 30 | #include "misc.h"
 31 | #include "basictokens.h"
 32 | 
 33 | void errorCheck(int value, char * errorMessage) {
 34 |     if (value == -1) {
 35 | 	fprintf(stderr, "Error: %s with %d\n", errorMessage, value);
 36 |         exit(EXIT_FAILURE);
 37 |     }
 38 | }
 39 | 
 40 | char* translateErrorCodeToMessage(unsigned char errorCode) {
 41 |     char * errorMessage=NULL;
 42 |     switch (errorCode) {
 43 |     case ERR_STR_ONLYTEST_EQ:
 44 |         errorMessage="Can only test for equality with strings";
 45 |         break;
 46 |     case ERR_NONE_ONLYTEST_EQ:
 47 |         errorMessage="Can only test for equality with none";
 48 |         break;
 49 |     case ERR_ONLY_ADDITION_STR:
 50 |         errorMessage="Can only perform addition with strings";
 51 |         break;
 52 |     case ERR_TOO_MANY_ARR_INDEX:
 53 |         errorMessage="Too many array indexes in expression";
 54 |         break;
 55 |     case ERR_NEG_ARR_INDEX:
 56 |         errorMessage="Not allowed negative array indexes";
 57 |         break;
 58 |     case ERR_ARR_INDEX_EXCEED_SIZE:
 59 |         errorMessage="Array index in expression exceeds array size in that dimension";
 60 |         break;
 61 |     case ERR_ONLY_DISPLAY_STR_WITH_INPUT:
 62 |         errorMessage="Can only display strings with input statement";
 63 |         break;
 64 |     case ERR_OUT_OF_SHARED_HEAP_MEM:
 65 |         errorMessage="Out of shared heap memory for data";
 66 |         break;
 67 |     case ERR_OUT_OF_CORE_SHARED_HEAP_MEM:
 68 |         errorMessage="Out of core and shared heap memory for data";
 69 |         break;
 70 |     case ERR_OUT_OF_SHARED_STACK_MEM:
 71 |         errorMessage="Out of shared stack memory for data";
 72 |         break;
 73 |     case ERR_OUT_OF_CORE_SHARED_STACK_MEM:
 74 |         errorMessage="Out of core and shared stack memory for data";
 75 |         break;
 76 |     case ERR_ONLY_SEND_INT_AND_REAL:
 77 |         errorMessage="Can only send integers and reals between cores";
 78 |         break;
 79 |     case ERR_SEND_TO_UNKNOWN_CORE:
 80 |         errorMessage="Attempting to send to non-existent or inactive process";
 81 |         break;
 82 |     case ERR_SEND_TO_INACTIVE_CORE:
 83 |         errorMessage="Attempting to send to inactive core";
 84 |         break;
 85 |     case ERR_RECV_FROM_UNKNOWN_CORE:
 86 |         errorMessage="Attempting to receive from non-existent or inactive process";
 87 |         break;
 88 |     case ERR_RECV_FROM_INACTIVE_CORE:
 89 |         errorMessage="Attempting to receive from inactive core";
 90 |         break;
 91 |     case ERR_SENDRECV_WITH_UNKNOWN_CORE:
 92 |         errorMessage="Attempting to sendrecv with non-existent or inactive process";
 93 |         break;
 94 |     case ERR_FREE_ON_NON_HEAP:
 95 |         errorMessage="Attempting to free non allocated heap memory";
 96 |         break;
 97 |     case ERR_INCORRECT_NUM_NATIVE_PARAMS:
 98 |         errorMessage="Incorrect number of parameters provided to native function call";
 99 |         break;
100 |     case ERR_UNKNOWN_NATIVE_COMMAND:
101 |         errorMessage="Unknown native command supplied to runtime library";
102 |         break;
103 |     case ERR_FNCALL_VAR_NOT_CONTAINING_FN_PTR:
104 |         errorMessage="Function called via a variable but this variable is not pointing to any function";
105 |         break;
106 |     case ERR_PROBE_NOT_SUPPORTED:
107 |         errorMessage="Message probe and non-blocking send test and wait not supported for communications with virtual cores";
108 |         break;
109 |     case ERR_NBSEND_NOT_SUPPORTED:
110 |         errorMessage="Non-blocking sends between device and virtual cores on the host are not yet supported";
111 |         break;
112 |     }
113 |     if (errorMessage != NULL) {
114 |         char * msgToRet=(char*) malloc(strlen(errorMessage) + 1);
115 |         strcpy(msgToRet, errorMessage);
116 |         return msgToRet;
117 |     } else {
118 |         errorMessage="Unknown error code of";
119 |         char * msgToRet=(char*) malloc(strlen(errorMessage) + 10);
120 |         sprintf(msgToRet, "%s 0x%x", errorMessage, errorCode);
121 |         return msgToRet;
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/host/parser.h:
--------------------------------------------------------------------------------
  1 | /* A Bison parser, made by GNU Bison 3.0.4.  */
  2 | 
  3 | /* Bison interface for Yacc-like parsers in C
  4 | 
  5 |    Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc.
  6 | 
  7 |    This program is free software: you can redistribute it and/or modify
  8 |    it under the terms of the GNU General Public License as published by
  9 |    the Free Software Foundation, either version 3 of the License, or
 10 |    (at your option) any later version.
 11 | 
 12 |    This program is distributed in the hope that it will be useful,
 13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |    GNU General Public License for more details.
 16 | 
 17 |    You should have received a copy of the GNU General Public License
 18 |    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 19 | 
 20 | /* As a special exception, you may create a larger work that contains
 21 |    part or all of the Bison parser skeleton and distribute that work
 22 |    under terms of your choice, so long as that work isn't itself a
 23 |    parser generator using the skeleton or a modified version thereof
 24 |    as a parser skeleton.  Alternatively, if you modify or redistribute
 25 |    the parser skeleton itself, you may (at your option) remove this
 26 |    special exception, which will cause the skeleton and the resulting
 27 |    Bison output files to be licensed under the GNU General Public
 28 |    License without this special exception.
 29 | 
 30 |    This special exception was added by the Free Software Foundation in
 31 |    version 2.2 of Bison.  */
 32 | 
 33 | #ifndef YY_YY_PARSER_H_INCLUDED
 34 | # define YY_YY_PARSER_H_INCLUDED
 35 | /* Debug traces.  */
 36 | #ifndef YYDEBUG
 37 | # define YYDEBUG 0
 38 | #endif
 39 | #if YYDEBUG
 40 | extern int yydebug;
 41 | #endif
 42 | 
 43 | /* Token type.  */
 44 | #ifndef YYTOKENTYPE
 45 | # define YYTOKENTYPE
 46 |   enum yytokentype
 47 |   {
 48 |     INTEGER = 258,
 49 |     REAL = 259,
 50 |     STRING = 260,
 51 |     IDENTIFIER = 261,
 52 |     NEWLINE = 262,
 53 |     INDENT = 263,
 54 |     OUTDENT = 264,
 55 |     DIM = 265,
 56 |     SDIM = 266,
 57 |     EXIT = 267,
 58 |     QUIT = 268,
 59 |     ELSE = 269,
 60 |     ELIF = 270,
 61 |     COMMA = 271,
 62 |     WHILE = 272,
 63 |     PASS = 273,
 64 |     AT = 274,
 65 |     FOR = 275,
 66 |     TO = 276,
 67 |     FROM = 277,
 68 |     NEXT = 278,
 69 |     GOTO = 279,
 70 |     PRINT = 280,
 71 |     INPUT = 281,
 72 |     IF = 282,
 73 |     NATIVE = 283,
 74 |     ADD = 284,
 75 |     SUB = 285,
 76 |     COLON = 286,
 77 |     DEF = 287,
 78 |     RET = 288,
 79 |     NONE = 289,
 80 |     FILESTART = 290,
 81 |     IN = 291,
 82 |     ADDADD = 292,
 83 |     SUBSUB = 293,
 84 |     MULMUL = 294,
 85 |     DIVDIV = 295,
 86 |     MODMOD = 296,
 87 |     POWPOW = 297,
 88 |     FLOORDIVFLOORDIV = 298,
 89 |     FLOORDIV = 299,
 90 |     MULT = 300,
 91 |     DIV = 301,
 92 |     MOD = 302,
 93 |     AND = 303,
 94 |     OR = 304,
 95 |     NEQ = 305,
 96 |     LEQ = 306,
 97 |     GEQ = 307,
 98 |     LT = 308,
 99 |     GT = 309,
100 |     EQ = 310,
101 |     IS = 311,
102 |     NOT = 312,
103 |     STR = 313,
104 |     ID = 314,
105 |     SYMBOL = 315,
106 |     ALIAS = 316,
107 |     LPAREN = 317,
108 |     RPAREN = 318,
109 |     SLBRACE = 319,
110 |     SRBRACE = 320,
111 |     TRUE = 321,
112 |     FALSE = 322,
113 |     ASSGN = 323,
114 |     POW = 324
115 |   };
116 | #endif
117 | /* Tokens.  */
118 | #define INTEGER 258
119 | #define REAL 259
120 | #define STRING 260
121 | #define IDENTIFIER 261
122 | #define NEWLINE 262
123 | #define INDENT 263
124 | #define OUTDENT 264
125 | #define DIM 265
126 | #define SDIM 266
127 | #define EXIT 267
128 | #define QUIT 268
129 | #define ELSE 269
130 | #define ELIF 270
131 | #define COMMA 271
132 | #define WHILE 272
133 | #define PASS 273
134 | #define AT 274
135 | #define FOR 275
136 | #define TO 276
137 | #define FROM 277
138 | #define NEXT 278
139 | #define GOTO 279
140 | #define PRINT 280
141 | #define INPUT 281
142 | #define IF 282
143 | #define NATIVE 283
144 | #define ADD 284
145 | #define SUB 285
146 | #define COLON 286
147 | #define DEF 287
148 | #define RET 288
149 | #define NONE 289
150 | #define FILESTART 290
151 | #define IN 291
152 | #define ADDADD 292
153 | #define SUBSUB 293
154 | #define MULMUL 294
155 | #define DIVDIV 295
156 | #define MODMOD 296
157 | #define POWPOW 297
158 | #define FLOORDIVFLOORDIV 298
159 | #define FLOORDIV 299
160 | #define MULT 300
161 | #define DIV 301
162 | #define MOD 302
163 | #define AND 303
164 | #define OR 304
165 | #define NEQ 305
166 | #define LEQ 306
167 | #define GEQ 307
168 | #define LT 308
169 | #define GT 309
170 | #define EQ 310
171 | #define IS 311
172 | #define NOT 312
173 | #define STR 313
174 | #define ID 314
175 | #define SYMBOL 315
176 | #define ALIAS 316
177 | #define LPAREN 317
178 | #define RPAREN 318
179 | #define SLBRACE 319
180 | #define SRBRACE 320
181 | #define TRUE 321
182 | #define FALSE 322
183 | #define ASSGN 323
184 | #define POW 324
185 | 
186 | /* Value type.  */
187 | #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
188 | 
189 | union YYSTYPE
190 | {
191 | #line 22 "epython.y" /* yacc.c:1909  */
192 | 
193 | 	int integer;
194 | 	unsigned char uchar;
195 | 	float real;	
196 | 	struct memorycontainer * data;
197 | 	char *string;
198 | 	struct stack_t * stack;
199 | 
200 | #line 201 "parser.h" /* yacc.c:1909  */
201 | };
202 | 
203 | typedef union YYSTYPE YYSTYPE;
204 | # define YYSTYPE_IS_TRIVIAL 1
205 | # define YYSTYPE_IS_DECLARED 1
206 | #endif
207 | 
208 | 
209 | extern YYSTYPE yylval;
210 | 
211 | int yyparse (void);
212 | 
213 | #endif /* !YY_YY_PARSER_H_INCLUDED  */
214 | 


--------------------------------------------------------------------------------
/host/epython.l:
--------------------------------------------------------------------------------
  1 | %{
  2 | #include "parser.h"
  3 | #include "stack.h"
  4 | 
  5 | static const unsigned int TAB_WIDTH = 4;
  6 | 
  7 | int line_num = 0, indent_caller;
  8 | char * parsing_filename=NULL, *fn_decorator=NULL;
  9 | int line_indent=0, fake_outdent_symbol=0;
 10 | 
 11 | extern struct stack_t indent_stack, filenameStack, lineNumberStack;
 12 | 
 13 | void yyget_INTEGER(YYSTYPE*, char*, size_t);
 14 | void yyget_REAL (YYSTYPE*, char*, size_t);
 15 | void yyget_STRING(YYSTYPE*, char*, size_t);
 16 | 
 17 | #define yyget_IDENTIFIER yyget_STRING
 18 | #define SAVE_VALUE(type)					\
 19 | {						\
 20 |         yyget_##type(&yylval, yytext, yyleng);	\
 21 |         return  type;				\
 22 | }
 23 | 
 24 | void yyget_INTEGER(YYSTYPE *outval, char *text, size_t len) {
 25 | 	outval->integer = atoi(text);
 26 | }
 27 | 
 28 | void yyget_REAL(YYSTYPE *outval, char *text, size_t len) {
 29 | 	outval->real = atof(text);
 30 | }
 31 | 
 32 | void yyget_STRING(YYSTYPE *outval, char *text, size_t len) {
 33 | 	outval->string = text;
 34 | }
 35 | %}
 36 | 
 37 | /* Python indentation handling based upon code at https://github.com/lucasb-eyer/flex-bison-indentation and 
 38 | http://www.benbarbour.com/implementing-python-style-indention-syntax-using-flex-bison-or-lexyacc */
 39 | 
 40 | O   [0-7]
 41 | D   [0-9]
 42 | NZ  [1-9]
 43 | L   [a-zA-Z_]
 44 | A   [a-zA-Z_0-9]
 45 | H   [a-fA-F0-9]
 46 | HP  (0[xX])
 47 | E   ([Ee][+-]?{D}+)
 48 | P   ([Pp][+-]?{D}+)
 49 | FS  (f|F|l|L)
 50 | IS  (((u|U)(l|L|ll|LL)?)|((l|L|ll|LL)(u|U)?))
 51 | CP  (u|U|L)
 52 | SP  (u8|u|U|L)
 53 | ES  (\\(['"\?\\abfnrtv]|[0-7]{1,3}|x[a-fA-F0-9]+))
 54 | WS  [ \t\v\n\f]
 55 | 
 56 | STRING_CHARS [[:print:]]{-}[\"]
 57 | 
 58 | %option noyywrap case-insensitive
 59 | %x COMMENTS
 60 | %x SINGLELINECOMMENT
 61 | %x INDENT_MODE
 62 | %%
 63 | 
 64 | \<\<\<.*\n						{ 
 65 | 									if (parsing_filename != NULL) {
 66 | 										pushIdentifier(&filenameStack, parsing_filename); 
 67 | 										push(&lineNumberStack, line_num);
 68 | 										free(parsing_filename);
 69 | 									}
 70 | 									parsing_filename=(char*) malloc(yyleng-3);
 71 | 									strncpy(parsing_filename, &yytext[3], yyleng-4);
 72 | 									parsing_filename[yyleng-4]='\0';									
 73 | 									line_num=1; 
 74 | 								}
 75 | \>\>\>.*\n						{
 76 | 									if (getStackSize(&filenameStack) > 0) {
 77 | 										parsing_filename=popIdentifier(&filenameStack);
 78 | 										line_num=pop(&lineNumberStack);
 79 | 									}									
 80 | 								}
 81 | 
 82 | \/\*					{BEGIN(COMMENTS);}
 83 | <COMMENTS>\*\/			{BEGIN(INITIAL);}
 84 | <COMMENTS>\n            { ++line_num; }
 85 | <SINGLELINECOMMENT>\n	{ ++line_num;BEGIN(INITIAL); return NEWLINE; }
 86 | <COMMENTS,SINGLELINECOMMENT>.		;
 87 | 
 88 | <INDENT_MODE>" "     { line_indent++; }
 89 | <INDENT_MODE>\t      { line_indent+=TAB_WIDTH; }
 90 | <INDENT_MODE>\n      { line_indent=0; }
 91 | <INDENT_MODE><<EOF>> { 	if (peek(&indent_stack) > 0) {
 92 | 						pop(&indent_stack);
 93 | 						if (line_indent < peek(&indent_stack)) {
 94 | 							int i;
 95 |                             unput('\n');
 96 |                             for (i=0;i<peek(&indent_stack);i++) unput(' ');                            
 97 |                         } else {											
 98 | 							BEGIN(indent_caller);
 99 | 						}
100 | 						return OUTDENT;						
101 | 					} else {
102 | 						yyterminate();
103 | 					}					 
104 | 				}
105 | <INDENT_MODE>.		{
106 | 					if (!fake_outdent_symbol) unput(*yytext);					
107 | 					fake_outdent_symbol=0;
108 | 					if (line_indent > 0 && line_indent > peek(&indent_stack)) {
109 | 						push(&indent_stack, line_indent);						
110 | 						BEGIN(indent_caller);											
111 | 						return INDENT;
112 | 					} else if (line_indent < peek(&indent_stack)) {
113 | 						pop(&indent_stack);						
114 | 						if (peek(&indent_stack) != -1 && line_indent != peek(&indent_stack)) {
115 | 							int i;
116 | 							for(i=0;i<line_indent;i++) unput(' ');
117 |                             unput('\n');                            
118 |                             unput('.');                          
119 |                             fake_outdent_symbol=1;
120 |                             for(i=0;i<peek(&indent_stack);i++) unput(' ');
121 |                             unput('\n');
122 | 						} else {
123 | 							BEGIN(indent_caller);
124 | 						}						
125 | 						return OUTDENT;
126 | 					} else {
127 | 						BEGIN(indent_caller);
128 | 					}
129 | 				}									
130 | 
131 | \r\n|\n                                 { ++line_num; line_indent=0; indent_caller = YY_START; BEGIN(INDENT_MODE); return NEWLINE; }
132 | [ \t\n]                 				;
133 | \"{STRING_CHARS}*\"                     SAVE_VALUE(STRING);
134 | 
135 | {HP}{H}+{IS}?							SAVE_VALUE(INTEGER); 
136 | {NZ}{D}*{IS}?							SAVE_VALUE(INTEGER);
137 | "0"{O}*{IS}?							SAVE_VALUE(INTEGER);
138 | {CP}?"'"([^'\\\n]|{ES})+"'"				SAVE_VALUE(INTEGER);
139 | 
140 | {D}+{E}{FS}?							SAVE_VALUE(REAL); 
141 | {D}*"."{D}+{E}?{FS}?					SAVE_VALUE(REAL);
142 | {D}+"."{E}?{FS}?						SAVE_VALUE(REAL);
143 | {HP}{H}+{P}{FS}?						SAVE_VALUE(REAL); 
144 | {HP}{H}*"."{H}+{P}{FS}?					SAVE_VALUE(REAL);
145 | {HP}{H}+"."{P}{FS}?						SAVE_VALUE(REAL);
146 |       
147 | AND                                     return AND;
148 | OR                                      return OR;
149 | NOT|"!"                                 return NOT;
150 | "<>"|"!="                               return NEQ;
151 | "<="                                    return LEQ;
152 | ">="                                    return GEQ;
153 | "<"                                     return LT;
154 | ">"                                     return GT;
155 | "="										return ASSGN;
156 | "=="                                	return EQ;
157 | "^"|"**"                                return POW;
158 | ","										return COMMA;
159 | ":"										return COLON;
160 | "+"                                     return ADD;
161 | "-"                                     return SUB;
162 | "*"                                     return MULT;
163 | "/"                                     return DIV;
164 | "%"                                     return MOD;
165 | "//"									return FLOORDIV;
166 | "+="									return ADDADD;
167 | "-="									return SUBSUB;
168 | "*="									return MULMUL;
169 | "/="									return DIVDIV;
170 | "%="									return MODMOD;
171 | "**="									return POWPOW;
172 | "//="									return FLOORDIVFLOORDIV;
173 | "["										return SLBRACE;
174 | "]"										return SRBRACE;
175 | "("                                     return LPAREN;
176 | ")"                                     return RPAREN;
177 | "@"					return AT;
178 | TRUE									return TRUE;
179 | FALSE									return FALSE;
180 | DEF										return DEF;
181 | RETURN									return RET;
182 | NONE									return NONE;
183 | ELSE                                    return ELSE;
184 | ELIF									return ELIF;
185 | IN										return IN;
186 | IS										return IS;
187 | WHILE									return WHILE;
188 | PASS									return PASS;
189 | EXIT                                    return EXIT;
190 | QUIT					return QUIT;
191 | FOR                                     return FOR;
192 | TO                                      return TO;
193 | FROM									return FROM;
194 | NEXT                                    return NEXT;
195 | GOTO                                    return GOTO;
196 | IF                                      return IF;
197 | PRINT                                   return PRINT;
198 | INPUT                                   return INPUT;
199 | NATIVE									return NATIVE;
200 | STR										return STR;
201 | ID							return ID;
202 | SYMBOL					return SYMBOL;
203 | ALIAS					return ALIAS;
204 | "#"										BEGIN(SINGLELINECOMMENT);
205 | 
206 | [:.;]                                  return yytext[0];
207 | 
208 | [a-zA-Z_][a-zA-Z0-9_.]*           		SAVE_VALUE(IDENTIFIER);
209 | %%
210 | 


--------------------------------------------------------------------------------
/host/byteassembler.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2015, Nick Brown
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that the following conditions are met:
  7 |  *
  8 |  * Redistributions of source code must retain the above copyright notice, this
  9 |  * list of conditions and the following disclaimer.
 10 |  *
 11 |  * Redistributions in binary form must reproduce the above copyright notice,
 12 |  * this list of conditions and the following disclaimer in the documentation
 13 |  * and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef BYTEASSEMBLER_H_
 28 | #define BYTEASSEMBLER_H_
 29 | 
 30 | #include <stddef.h>
 31 | #include "stack.h"
 32 | 
 33 | #define NATIVE_RTL_ISHOST_STR "rtl_ishost"
 34 | #define NATIVE_RTL_ISDEVICE_STR "rtl_isdevice"
 35 | #define NATIVE_RTL_PRINT_STR "rtl_print"
 36 | #define NATIVE_RTL_NUMDIMS_STR "rtl_numdims"
 37 | #define NATIVE_RTL_DSIZE_STR "rtl_dsize"
 38 | #define NATIVE_RTL_INPUT_STR "rtl_input"
 39 | #define NATIVE_RTL_INPUTPRINT_STR "rtl_inputprint"
 40 | #define NATIVE_RTL_SYNC_STR "rtl_sync"
 41 | #define NATIVE_RTL_GC_STR "rtl_gc"
 42 | #define NATIVE_RTL_FREE_STR "rtl_free"
 43 | #define NATIVE_RTL_SEND_STR "rtl_send"
 44 | #define NATIVE_RTL_RECV_STR "rtl_recv"
 45 | #define NATIVE_RTL_SENDRECV_STR "rtl_sendrecv"
 46 | #define NATIVE_RTL_BCAST_STR "rtl_bcast"
 47 | #define NATIVE_RTL_NUMCORES_STR "rtl_numcores"
 48 | #define NATIVE_RTL_COREID_STR "rtl_coreid"
 49 | #define NATIVE_RTL_REDUCE_STR "rtl_reduce"
 50 | #define NATIVE_RTL_ALLOCATEARRAY_STR "rtl_allocatearray"
 51 | #define NATIVE_RTL_ALLOCATESHAREDARRAY_STR "rtl_allocatesharedarray"
 52 | #define NATIVE_RTL_MATH_STR "rtl_math"
 53 | #define NATIVE_RTL_PROBE_FOR_MESSAGE_STR "rtl_probe"
 54 | #define NATIVE_RTL_TEST_FOR_SEND_STR "rtl_test_for_send"
 55 | #define NATIVE_RTL_WAIT_FOR_SEND_STR "rtl_wait_for_send"
 56 | #define NATIVE_RTL_SEND_NB_STR "rtl_send_nonblocking"
 57 | #define NATIVE_RTL_GLOBAL_REFRENCE_STR "rtl_global_reference"
 58 | #define NATIVE_RTL_DEREFRENCE_STR "rtl_dereference"
 59 | #define NATIVE_RTL_FLATTEN_STR "rtl_flatten"
 60 | #define NATIVE_RTL_ARRAY_COPY_STR "rtl_arraycopy"
 61 | 
 62 | extern int line_num;
 63 | extern char * fn_decorator;
 64 | 
 65 | // Used for tracking gotos and line numberings (which are resolved once the byte code is assembled)
 66 | struct lineDefinition {
 67 | 	char type;
 68 | 	char * name;
 69 | 	int linenumber, currentpoint;
 70 | 	struct lineDefinition * next;
 71 | };
 72 | 
 73 | // Tree node for the current function call and the main entry point
 74 | struct function_call_tree_node {
 75 | 	int number_of_calls;
 76 | 	char* calledFunctions[256];
 77 | };
 78 | 
 79 | // A memory container, containing some bytecode, the length of the code and line definitions that relate to it
 80 | struct memorycontainer {
 81 | 	unsigned int length;
 82 | 	char * data;
 83 | 	struct lineDefinition * lineDefns;
 84 | };
 85 | 
 86 | // A function definition, containing the function memory and the name of the function
 87 | struct functionDefinition {
 88 | 	char * name;
 89 | 	struct memorycontainer * contents;
 90 | 	int numberEntriesInSymbolTable, recursive, number_of_fn_calls, called;
 91 | 	char ** functionCalls;
 92 | };
 93 | 
 94 | void enterFunction(char*);
 95 | unsigned short getNumberEntriesInSymbolTable(void);
 96 | void setNumberEntriesInSymbolTable(unsigned short);
 97 | void appendNewFunctionStatement(char*, struct stack_t*, struct memorycontainer*);
 98 | void appendArgument(char*);
 99 | struct memorycontainer* appendCallFunctionStatement(char*, struct stack_t*);
100 | struct memorycontainer* appendNativeCallFunctionStatement(char*, struct stack_t*, struct memorycontainer*);
101 | struct memorycontainer* appendReferenceStatement(char*);
102 | struct memorycontainer* appendSymbolStatement(char*);
103 | struct memorycontainer* appendAliasStatement(char*,struct memorycontainer*);
104 | struct memorycontainer* appendGotoStatement(int);
105 | struct memorycontainer* appendWhileStatement(struct memorycontainer*, struct memorycontainer*);
106 | struct memorycontainer* appendForStatement(char *, struct memorycontainer*, struct memorycontainer*);
107 | struct memorycontainer* appendIfStatement(struct memorycontainer*, struct memorycontainer*);
108 | struct memorycontainer* appendIfElseStatement(struct memorycontainer*, struct memorycontainer*, struct memorycontainer*);
109 | struct memorycontainer* appendArraySetStatement(char*, struct stack_t*, struct memorycontainer*);
110 | struct memorycontainer* appendLetStatement(struct memorycontainer*, struct memorycontainer*);
111 | struct memorycontainer* appendLetWithOperatorStatement(struct memorycontainer*, struct memorycontainer*, unsigned char);
112 | struct memorycontainer* appendReturnStatement(void);
113 | struct memorycontainer* appendReturnStatementWithExpression(struct memorycontainer*);
114 | struct memorycontainer* appendStopStatement(void);
115 | struct memorycontainer* appendPassStatement(void);
116 | struct memorycontainer* createStringExpression(char*);
117 | struct memorycontainer* createRealExpression(float);
118 | struct memorycontainer* createIntegerExpression(int);
119 | struct memorycontainer* createBooleanExpression(int);
120 | struct memorycontainer* createArrayExpression(struct stack_t*, struct memorycontainer*);
121 | struct memorycontainer* createNoneExpression(void);
122 | struct memorycontainer* createIdentifierExpression(char*,char);
123 | struct memorycontainer* createIdentifierArrayAccessExpression(char*, struct stack_t*);
124 | struct memorycontainer* createNumberExpression(float);
125 | struct memorycontainer* createNotExpression(struct memorycontainer*);
126 | struct memorycontainer* createOrExpression(struct memorycontainer*, struct memorycontainer*);
127 | struct memorycontainer* createAndExpression(struct memorycontainer*, struct memorycontainer*);
128 | struct memorycontainer* createEqExpression(struct memorycontainer*, struct memorycontainer*);
129 | struct memorycontainer* createIsExpression(struct memorycontainer*, struct memorycontainer*);
130 | struct memorycontainer* createNeqExpression(struct memorycontainer*, struct memorycontainer*);
131 | struct memorycontainer* createGtExpression(struct memorycontainer*, struct memorycontainer*);
132 | struct memorycontainer* createLtExpression(struct memorycontainer*, struct memorycontainer*);
133 | struct memorycontainer* createGeqExpression(struct memorycontainer*, struct memorycontainer*);
134 | struct memorycontainer* createLeqExpression(struct memorycontainer*, struct memorycontainer*);
135 | struct memorycontainer* createAddExpression(struct memorycontainer*, struct memorycontainer*);
136 | struct memorycontainer* createSubExpression(struct memorycontainer*, struct memorycontainer*);
137 | struct memorycontainer* createMulExpression(struct memorycontainer*, struct memorycontainer*);
138 | struct memorycontainer* createDivExpression(struct memorycontainer*, struct memorycontainer*);
139 | struct memorycontainer* createFloorDivExpression(struct memorycontainer*, struct memorycontainer*);
140 | struct memorycontainer* createModExpression(struct memorycontainer*, struct memorycontainer*);
141 | struct memorycontainer* createPowExpression(struct memorycontainer*, struct memorycontainer*);
142 | void addVariableIfNeeded(char*);
143 | void enterScope(void);
144 | void leaveScope(void);
145 | #endif /* BYTEASSEMBLER_H_ */
146 | 


--------------------------------------------------------------------------------
/docs/tutorial5.md:
--------------------------------------------------------------------------------
  1 | # Python task farms on the Epiphany
  2 | Splitting problems up into tasks and running these concurrently over a number of cores is a popular approach to parallelism. In recent year this has becoming more and more popular and is seen as one of the ways in which parallel codes might be written for future machines with very large numbers of processing cores. In this tutorial we are going to look at this task approach and using ePython it is very simple to send tasks around between the Epiphany cores.
  3 | 
  4 | Before going any further, if you have not yet used or installed ePython then it is worth following the first tutorial ([here](tutorial1.md)) which walks you though installing ePython and running a simple "hello world" example on the Epiphany cores. If you installed ePython a while ago then it is worth ensuring that you are running the latest version, instructions for upgrading are available [here](installupgrade.md)
  5 | 
  6 | ### Remote procedure calls
  7 | Remote Procedure Calls (RPC) is where a core will call a function to execute on another core, providing the arguments to that function and then obtaining any results from its execution. In Python functions are known as *first class*, which means that they can be refered to like any other value and even communicated between the Epiphany cores.
  8 | 
  9 | ```python
 10 | import parallel
 11 | 
 12 | if (coreid()==0):
 13 | 	send(functionToRun, 1)
 14 | 	send(50, 1)
 15 | 	print recv(1)
 16 | elif (coreid()==1):
 17 | 	op=recv(0)
 18 | 	arg=recv(0)
 19 | 	returnVal=op(arg)
 20 | 	send(returnVal, 0)
 21 | 
 22 | def functionToRun(a):
 23 | 	print "Running on core 1 "+a
 24 | 	return a+10
 25 | ```
 26 | 
 27 | In this code core 0 there is a function called *functionToRun*, which takes one argument and returns a value. Core 0 will send this function over to core 1, along with the argument and then await a message back from core 1 which it will then display. Core 1 receives the function (*op*), then receives the argument from core 0 to run this with (into *arg*), it then executes the function and sends back the returned value to core 0.
 28 | 
 29 | In this approach we can communicate any functions, any number of arguments and send returned values back from any cores. However there is a problem with how we have written this, namely the fact that for core 0 this is blocking, i.e. it sits idle and waits for the returned value whilst the function is being executed on core 1. This isn't ideal and not particularly parallel because there might be other functions which core 0 wants to execute on other cores.
 30 | 
 31 | ### The taskfarm module
 32 | We have seen so far that sending functions around is a nice way of executing them on other cores, but if these are possibly going to produce results and send them back we don't want to be stalling and waiting for the results. ePython comes with the *taskfarm* module which provides a way of farming tasks out to other cores and avoiding this issue of blocking for results. So now we are going to rewrite the first example but instead using functions from the *taskfarm* module:
 33 | 
 34 | ```python
 35 | import parallel
 36 | import taskfarm
 37 | 
 38 | initTaskFarm(0)
 39 | 
 40 | if (coreid()==0):
 41 | 	execFunction(1, functionToRun, 50)
 42 | 	if (testFunctionFinish(1)):
 43 | 		print "The function has executed"
 44 | 	else:
 45 | 		print "The function is still running"
 46 | 	print waitFunctionFinish(1)
 47 | 	shutdownTaskFarm()
 48 | else:
 49 | 	worker()
 50 | 
 51 | def functionToRun(a):
 52 | 	print "Running on core 1 "+a
 53 | 	return a+10
 54 | ```
 55 | 
 56 | The first function call *initTaskFarm* will initialise the task farm and the argument determines which core is the "master", i.e. which will instruct other cores to execute what functions. Every core which is not the master (in this case every core whose id is not 0) will call the *worker* function, which waits for either functions to execute (and then executes them) or for the task farm to shut down. Core 0 then calls *execFunction* which instructs the task farm to execute the *functionToRun* function on core 1 (the first argument) with the value of *50*. This *execFunction* call is non-blocking, this means  that it will return as soon as the required data has been communicated to core 1 rather than when core 1 has finished executing the function itself. It is possible to determine the progress of a remotely running function on a core via the *testFunctionFinish* call and the *waitFunctionFinish* will wait for function completion on a core and return any returned values from that function. The *testFunctionFinish* function just returns true or false (representing whether the remote function has finished executing) so if you use this test, even if this call returns true, then you will still need to call *waitFunctionFinish* to retrieve a returned value. The *shutdownTaskFarm* is called from the master to command the worker cores to shutdown once they have finished running any current functions.
 57 | 
 58 | As an exercise extend thsi example to run multiple functions over multiple worker cores (hint, if you get stuck then have a look at the [task_farm_example.py] (../examples/task_farm_example.py) which illustrates how to do this.
 59 | 
 60 | ### Master worker
 61 | <img src="https://raw.githubusercontent.com/mesham/epython/master/docs/masterworker.png" width=300 align="right">
 62 | In the previous section we used the terminology of "master" and "worker", this is a common approach in parallelism where one core (in this case that with the ID provided to the *initTaskFarm* function) is a master, dishing out work to all other other cores which are workers. You can see this in the diagram to the right, where the master sends out tasks and data to the workers which then execute these and send back any results and inform the master they have completed (and hence can accept another task.) Many parallel problems can be split up into this approach of master and worker, we have rewritten the estimation of PI via Monte Carlo example of ([tutorial 2](tutorial2.md)) to instead use tasks, the *taskfarm* module and this general parallelisation strategy of master-worker.
 63 | 
 64 | ```python
 65 | import parallel
 66 | import taskfarm
 67 | import util
 68 | from math import pow
 69 | from random import random
 70 | 
 71 | initTaskFarm(0)
 72 | 
 73 | if (coreid()==0):
 74 | 	piVal=0.0
 75 | 	for i in range(1,numcores()-1):
 76 | 		execFunction(i, simulateDarts, 1000)
 77 | 	for i in range(1,numcores()-1):
 78 | 		piVal+=waitFunctionFinish(i)
 79 | 	
 80 | 	print piVal/(numcores()-1)
 81 | 	shutdownTaskFarm()
 82 | else:
 83 | 	worker()
 84 | 
 85 | def simulateDarts(num_darts):
 86 | 	score=0.0
 87 | 	j=1
 88 | 	while j<=num_darts:
 89 | 		x=random()
 90 | 		y=random()
 91 | 
 92 | 		if (pow(x,2) + pow(y,2) < 1.0): score+=1			
 93 | 		j+=1
 94 | 	return 4.0 * (score/num_darts)
 95 | ```
 96 | 
 97 | In the code core 0 is the master, this then remotely executes the *simulateDarts* function on every other core concurrently (as remember *execFunction* is non-blocking), and then when each function is executing it will block for each remote function to complete in tern (via the *waitFunctionFinish* function) and add the returned value to the running total of PI which is then divided to deduce the final value. 
 98 | 
 99 | For reasons of simplicity for the example we are just executing the *simulateDarts* function once on each worker core, if you look at the PI example of ([tutorial 2](tutorial2.md)) in more detail you will see that this works in rounds. As an exercise extend the example to include these rounds so there are multiple function calls on each core. Once you have got a simple version of this working then instead of waiting for every function in each round to complete before moving onto the next round, consider how you might use the *testFunctionFinish* to poll all workers and simply re-assign more work (i.e. calls to the *simulateDarts* function) as they become idle.
100 | 
101 | ### Summary
102 | In this tutorial we have look at the concepts of tasks, remote procedure calls, task farms and master worker. It is often possible to rewrite many existing parallel codes in terms of distinct tasks which can be executed concurrently and this can form an alternative approach to parallelism. Whilst, for simplicity, we have focused on running a single task many times over all the cores (homogeneous), it is easy to see how one can provide additional functions and run tasks heterogeneously, i.e. very different tasks on different Epiphany cores. This can work well for some work loads and the tasks themselves can become quite complex and irregular, for instance involving communications.
103 | 


--------------------------------------------------------------------------------
/docs/tutorial6.md:
--------------------------------------------------------------------------------
  1 | # Epiphany as an accelerator: offloading Python kernels
  2 | 
  3 | The latest version of ePython makes it possible to take existing Python code and offload specific functions (we tend to call them kernels) to the Epiphany cores. This is really viewing the Epiphany as an accelerator, where codes run on the host (the Parallella) and specific computationally intensive kernels are then offloaded to the accelerator for execution. The good news is that, using ePython, it is super easy to do this!
  4 | 
  5 | Before going any further, if you have not yet used or installed ePython then it is worth following the first tutorial ([here](tutorial1.md)) which walks you though installing ePython and running a simple "hello world" example on the Epiphany cores. If you installed ePython a while ago then it is worth ensuring that you are running the latest version, instructions for upgrading are available [here](installupgrade.md)
  6 | 
  7 | **Important:** Unlike some other ePython tutorials, all the code snipets here are to be executed under the CPython interpreter (using the *python* command.)
  8 | 
  9 | ### Offloading a Python function
 10 | 
 11 | To offload a Python function onto the Epiphany we first need to import the *epython* module, in the code example below we do this at line one. Next we simply need to decorate each function to be offloaded with the *@offload* decorator. If you run this in any Python interpreter on the Parallella (via the *python* command) then you will see each Epiphany core displays the *Hello World* message and a list of size 16 is displayed, each element with the value of 30.
 12 | 
 13 | ```python
 14 | from epython import offload
 15 | 
 16 | @offload
 17 | def helloworld(a,b):
 18 |   print "Hello World"
 19 |   return a+b
 20 | 
 21 | print helloworld(10, 20)
 22 | ```
 23 | 
 24 | If you comment out line three (the *offload* decorator) and re-run then a single *Hello World* and 30 value is displayed. Without the decorator then the *helloworld* function runs on the Parallella only (what we call the host.) When we offload a function to the Epiphany behind the scenes it will copy your code and function arguments to the Epiphany cores. Once the function has completed then each core will send its return value back (if there is one) to the Parallella host. In this example the return value is 30 (10 plus 20) and the function call provides 16 of these values - one from each Epiphany core.
 25 | 
 26 | ### Non-blocking asynchronous kernel launches
 27 | 
 28 | The previous example was blocking, where execution in Python on the host will stop and wait for the kernel to run to completion on the Epiphany before continuing. We don't always want this, instead it can sometimes be nice to launch kernels on the Epiphany, then whilst these run go and do something else before grabbing the results sometime later.
 29 | 
 30 | ```python
 31 | from epython import offload
 32 | 
 33 | @offload(async=True)
 34 | def helloworld(a,b):
 35 |   print "Hello World"
 36 |   return a+b
 37 | 
 38 | handler=helloworld(10, 20)
 39 | print handler.wait()
 40 | ```
 41 | 
 42 | In the code example above we have added the argument *async=True* to the *offload* decorator, which tells ePython to launch this function in an asynchronous, non-blocking, manner. Instead of returning the values directly from the function call (at line 8) a handler is returned which can be used to track function execution. At line 9 we are telling Python to wait upon handler completion, which will return the actual returned values from each kernel on the Epiphanies. It is also possible to use the *wait_any* call to wait for any return value (and potentially use this whilst other cores complete) as well as the *test* call which will return whether at least one kernel has completed and made its return value is available.
 43 | 
 44 | What if you launch multiple kernels without waiting for previous ones to complete? That's absolutely fine as ePython contains a scheduler which will queue up kernel launches until the Epiphany cores are free to execute them.
 45 | 
 46 | ### Running on a subset of the Epiphany cores
 47 | 
 48 | Up until this point we have executed our kernel on all the Epiphany cores, but often you want to limit to a subset of the cores instead. Using arguments to the *offload* directive we can instruct ePython how many and/or what cores to run on.
 49 | 
 50 | ```python
 51 | from epython import offload
 52 | 
 53 | @offload(auto=4)
 54 | def helloworld(a,b):
 55 |   print "Hello World"
 56 |   return a+b
 57 | 
 58 | print helloworld(10, 20)
 59 | ```
 60 | 
 61 | In this example we have added the *auto* argment to the *offload* directive, this tells ePython to run over 4 cores - but you don't care which cores these are so to best select exactly which cores to run over (i.e. idle cores.) Instead of *auto* you can use *target*, for instance *target=[1,5,8]* which will explicitly run the kernel only on cores 1, 5 and 8.
 62 | 
 63 | ```python
 64 | from epython import offload
 65 | 
 66 | @offload
 67 | def helloworld(a,b):
 68 |   print "Hello World"
 69 |   return a+b
 70 | 
 71 | h=helloworld(10, 20, target=[9, 10], async=True)
 72 | print h.wait()
 73 | ```
 74 | 
 75 | In the example above we have done things slightly differently - this *helloworld* function will execute asynchronously and on Epiphany cores 9 and 10 only. But we have instructed ePython to do this by arguments to the function call rather than arguments to the *offload* decorator. This provides additional flexibility, you can think of arguments to the specific function call as overriding the options provided to the decorator. For instance here by default *helloworld* will run on all cores in a blocking manner due to the arguments (or lack thereof) to the *offload* decorator. However we have overridden the behaviour just for this one specific kernel launch to execute asynchronously only on Epiphany cores 9 and 10.
 76 | 
 77 | ### Short cuts for offload arguments
 78 | 
 79 | Remembering the offload arguments for common calls can be a bit of a pain - hence we have also introduced the *offload_multiple* and *offload_single* decorators. These can be thought of exactly the same as the *offload* directive, but set up some pre-defined behaviour. The *offload_multiple* decorator will launch kernels in an asynchronous, non-blocking manner, on a subset of cores (the number given by the *cores* argument.) The *offload_single* decorator will launch the kernel in an asynchronous, non-blocking, manner on any single Epiphany core.
 80 | 
 81 | ```python
 82 | from epython import offload_multiple, offload_single, waitAll
 83 | 
 84 | @offload_multiple(cores=8)
 85 | def adder(a,b):
 86 |   return a+b
 87 | 
 88 | @offload_single
 89 | def subtractor(a,b):
 90 |   return a-b
 91 | 
 92 | 
 93 | h1=adder(10,20)
 94 | h2=subtractor(10,20)
 95 | print waitAll(h1,h2)
 96 | ```
 97 | 
 98 | In this code snippet we have two functions, an *adder* function that will run over 8 Epiphany cores and a *substractor* function that will run only on one Epiphany core. These are both launched and the *waitAll* ePython call is issued to wait for all provided handlers to complete which also returns the kernel values from the Epiphany.
 99 | 
100 | ### Putting it all together to find PI
101 | 
102 | Back in [tutorial 2](tutorial2.md) we ran a code directly on the Epiphany cores through ePython to find the value of PI using the dartboard method. We can modify this code to instead be executed from CPython, with the computational kernel offloaded to the Epiphany cores.
103 | 
104 | ```python
105 | from epython import offload
106 | 
107 | @offload
108 | def findPI(darts, rounds):
109 |   from random import random
110 |   from math import pow
111 |   mypi=0.0
112 |   i=1
113 |   while i<=rounds:
114 |     score=0.0
115 |     j=1
116 |     while j<=darts:
117 |       x=random()
118 |       y=random()
119 |       if (pow(x,2) + pow(y,2) < 1.0):
120 |         score+=1
121 |       j+=1
122 |     mypi=mypi+4.0 * (score/darts)
123 |     i+=1
124 |   return mypi
125 | 
126 | pi=sum(findPI(100,10))
127 | print "Value of PI is "+str((pi/10)/16)
128 | ```
129 | 
130 | In this code the *findPI* function will run on each Epiphany core - you can see that we are also importing specific module functions in this kernel too to provide us with *random* from the *random* module and the *pow* function from the *math* module (lines 5 and 6.) As an exercise, if you comment out the offload directive (line 3) and replace the last two lines with *print findPI(100,10)/10* then this will run on the host (the Parallella) in CPython only.
131 | 
132 | ### Summary
133 | 
134 | In this tutorial we have looked at offloading specific functions (we often call then kernels) in an existing Python code onto the Epiphany. This is really useful, not least because ePython only supports a subset of the Python language - so being able to offload the computational kernels whilst keeping everything else unchanged on the host can make things far easier.
135 | 
136 | However this is not quite the full story! What kills performance is copying data to and from an accelerator (i.e. arguments to and return values from the Epiphany kernels.) In the [next tutorial](tutorial7.md) we look at other, data focused, calls to allow us to declare accelerator resident data which kernels can then use without having to copy that data to and from the Epiphany continually.
137 | 


--------------------------------------------------------------------------------
/host/epython.y:
--------------------------------------------------------------------------------
  1 | %{
  2 | #include "byteassembler.h"
  3 | #include "memorymanager.h"
  4 | #include "stack.h"
  5 | #include <string.h>
  6 | #include <stdlib.h>
  7 | #include <stddef.h>
  8 | #include <stdio.h>
  9 | 
 10 | extern int line_num;
 11 | extern char * parsing_filename;
 12 | extern char * fn_decorator;
 13 | void yyerror(char const*);
 14 | int yylex(void);
 15 | 
 16 | void yyerror (char const *msg) {
 17 | 	fprintf(stderr, "%s at line %d of file %s\n", msg, line_num, parsing_filename);
 18 | 	exit(0);
 19 | }
 20 | %}
 21 | 
 22 | %union {
 23 | 	int integer;
 24 | 	unsigned char uchar;
 25 | 	float real;	
 26 | 	struct memorycontainer * data;
 27 | 	char *string;
 28 | 	struct stack_t * stack;
 29 | }
 30 | 
 31 | %token <integer> INTEGER
 32 | %token <real>    REAL
 33 | %token <string>  STRING IDENTIFIER
 34 | 
 35 | %token NEWLINE INDENT OUTDENT
 36 | %token DIM SDIM EXIT QUIT ELSE ELIF COMMA WHILE PASS AT
 37 | %token FOR TO FROM NEXT GOTO PRINT INPUT
 38 | %token IF NATIVE
 39 | 
 40 | %token ADD SUB COLON DEF RET NONE FILESTART IN ADDADD SUBSUB MULMUL DIVDIV MODMOD POWPOW FLOORDIVFLOORDIV FLOORDIV
 41 | %token MULT DIV MOD AND OR NEQ LEQ GEQ LT GT EQ IS NOT STR ID SYMBOL ALIAS
 42 | %token LPAREN RPAREN SLBRACE SRBRACE TRUE FALSE
 43 | 
 44 | %left ADD SUB ADDADD SUBSUB
 45 | %left MULT DIV MOD MULMUL DIVDIV MODMOD
 46 | %left AND OR
 47 | %left NEQ LEQ GEQ LT GT EQ IS ASSGN
 48 | %right NOT
 49 | %right POW POWPOW FLOORDIVFLOORDIV FLOORDIV
 50 | 
 51 | %type <string> ident declareident fn_entry
 52 | %type <integer> unary_operator 
 53 | %type <uchar> opassgn
 54 | %type <data> constant expression logical_or_expression logical_and_expression equality_expression relational_expression additive_expression multiplicative_expression value statement statements line lines codeblock elifblock identscalararray identscalararraylhs
 55 | %type <stack> fndeclarationargs fncallargs commaseparray arrayaccessor
 56 | 
 57 | %start program 
 58 | 
 59 | %%
 60 | 
 61 | program : lines { compileMemory($1); }
 62 | 
 63 | lines
 64 |         : line
 65 |         | lines line { $$=concatenateMemory($1, $2); }
 66 | ;
 67 | 
 68 | line
 69 |         : statements NEWLINE { $$ = $1; }
 70 |         | statements { $$ = $1; }        
 71 | 	    | NEWLINE { $$ = NULL; }
 72 | ;
 73 | 
 74 | statements
 75 | 	: statement statements { $$=concatenateMemory($1, $2); }
 76 | 	| statement
 77 | ;
 78 | 
 79 | statement	
 80 | 	: FOR declareident IN expression COLON codeblock { $$=appendForStatement($2, $4, $6); leaveScope(); }
 81 | 	| WHILE expression COLON codeblock { $$=appendWhileStatement($2, $4); }	
 82 | 	| IF expression COLON codeblock { $$=appendIfStatement($2, $4); }
 83 | 	| IF expression COLON codeblock ELSE COLON codeblock { $$=appendIfElseStatement($2, $4, $7); }
 84 | 	| IF expression COLON codeblock elifblock { $$=appendIfElseStatement($2, $4, $5); }		
 85 | 	| IF expression COLON statements { $$=appendIfStatement($2, $4); }
 86 | 	| ELIF expression COLON codeblock { $$=appendIfStatement($2, $4); }		
 87 |     	| identscalararraylhs ASSGN expression { $$=appendLetStatement($1, $3); }
 88 |     	| identscalararray opassgn expression { $$=appendLetWithOperatorStatement($1, $3, $2); }
 89 | 	| PRINT expression { $$=appendNativeCallFunctionStatement("rtl_print", NULL, $2); }	
 90 | 	| EXIT LPAREN RPAREN{ $$=appendStopStatement(); }
 91 | 	| QUIT LPAREN RPAREN{ $$=appendStopStatement(); }
 92 | 	| fn_entry LPAREN fndeclarationargs RPAREN COLON codeblock { appendNewFunctionStatement($1, $3, $6); leaveScope(); $$ = NULL; }
 93 | 	| RET { $$ = appendReturnStatement(); }	
 94 | 	| RET expression { $$ = appendReturnStatementWithExpression($2); }
 95 | 	| ident LPAREN fncallargs RPAREN { $$=appendCallFunctionStatement($1, $3); }
 96 | 	| NATIVE ident LPAREN fncallargs RPAREN { $$=appendNativeCallFunctionStatement($2, $4, NULL); }
 97 | 	| PASS { $$=appendPassStatement(); }
 98 | 	| AT ident {  fn_decorator=(char*) malloc(strlen($2)+1); strcpy(fn_decorator, $2); $$ = NULL; }
 99 | 	| ALIAS LPAREN ident COMMA expression RPAREN { $$=appendAliasStatement($3, $5); }
100 | ;
101 | 
102 | arrayaccessor
103 | 	: SLBRACE expression SRBRACE { $$=getNewStack(); pushExpression($$, $2); }
104 | 	| arrayaccessor SLBRACE expression SRBRACE { pushExpression($1, $3); }
105 | ;
106 | 
107 | fncallargs
108 | 	: /*blank*/ { $$=getNewStack(); }	
109 | 	| expression { $$=getNewStack(); pushExpression($$, $1); }
110 | 	| fncallargs COMMA expression { pushExpression($1, $3); $$=$1; }
111 | 	;
112 | 
113 | fndeclarationargs
114 | 	: /*blank*/ { enterScope(); $$=getNewStack(); }
115 | 	| ident { $$=getNewStack(); enterScope(); pushIdentifier($$, $1); appendArgument($1); }
116 | 	| ident ASSGN expression { $$=getNewStack(); enterScope(); pushIdentifierAssgnExpression($$, $1, $3); appendArgument($1); }
117 | 	| fndeclarationargs COMMA ident { pushIdentifier($1, $3); $$=$1; appendArgument($3); }	
118 | 	| fndeclarationargs COMMA ident ASSGN expression { pushIdentifierAssgnExpression($1, $3, $5); $$=$1; appendArgument($3); }
119 | 	;
120 | 	
121 | fn_entry
122 | 	: DEF ident { enterFunction($2); $$=$2; }
123 | 	;
124 | 
125 | codeblock
126 | 	: NEWLINE indent_rule lines outdent_rule { $$=$3; }
127 | 	
128 | indent_rule
129 | 	: INDENT { enterScope(); }
130 | 	
131 | outdent_rule
132 | 	: OUTDENT { leaveScope(); }
133 | 	
134 | opassgn
135 | 	: ADDADD { $$=0; }
136 | 	| SUBSUB { $$=1; }
137 | 	| MULMUL { $$=2; }
138 | 	| DIVDIV { $$=3; }
139 | 	| MODMOD { $$=4; }
140 | 	| POWPOW { $$=5; }
141 | 	| FLOORDIVFLOORDIV { $$=6; }
142 | 
143 | declareident
144 | 	 : ident { $$=$1; enterScope(); addVariableIfNeeded($1); }
145 | ;
146 | 
147 | elifblock
148 | 	: ELIF expression COLON codeblock { $$=appendIfStatement($2, $4); }
149 | 	| ELIF expression COLON codeblock ELSE COLON codeblock { $$=appendIfElseStatement($2, $4, $7); }
150 | 	| ELIF expression COLON codeblock elifblock { $$=appendIfElseStatement($2, $4, $5); }
151 | ;
152 | 
153 | expression
154 | 	: logical_or_expression { $$=$1; }
155 | 	| NOT logical_or_expression { $$=createNotExpression($2); }
156 | ;
157 | 
158 | logical_or_expression
159 | 	: logical_and_expression { $$=$1; }
160 | 	| logical_or_expression OR logical_and_expression { $$=createOrExpression($1, $3); }
161 | 
162 | logical_and_expression
163 | 	: equality_expression { $$=$1; }
164 | 	| logical_and_expression AND equality_expression { $$=createAndExpression($1, $3); }
165 | ;
166 | 
167 | equality_expression
168 | 	: relational_expression { $$=$1; }
169 | 	| equality_expression EQ relational_expression { $$=createEqExpression($1, $3); }
170 | 	| equality_expression NEQ relational_expression { $$=createNeqExpression($1, $3); }
171 | 	| equality_expression IS relational_expression { $$=createIsExpression($1, $3); }
172 | ;
173 | 
174 | relational_expression
175 | 	: additive_expression { $$=$1; }
176 | 	| relational_expression GT additive_expression { $$=createGtExpression($1, $3); }
177 | 	| relational_expression LT additive_expression { $$=createLtExpression($1, $3); }
178 | 	| relational_expression LEQ additive_expression { $$=createLeqExpression($1, $3); }
179 | 	| relational_expression GEQ additive_expression { $$=createGeqExpression($1, $3); }
180 | ;
181 | 
182 | additive_expression
183 | 	: multiplicative_expression { $$=$1; }
184 | 	| additive_expression ADD multiplicative_expression { $$=createAddExpression($1, $3); }
185 | 	| additive_expression SUB multiplicative_expression { $$=createSubExpression($1, $3); }
186 | ;
187 | 
188 | multiplicative_expression
189 | 	: value { $$=$1; }
190 | 	| multiplicative_expression MULT value { $$=createMulExpression($1, $3); }
191 | 	| multiplicative_expression DIV value { $$=createDivExpression($1, $3); }
192 | 	| multiplicative_expression FLOORDIV value { $$=createFloorDivExpression($1, $3); }
193 | 	| multiplicative_expression MOD value { $$=createModExpression($1, $3); }
194 | 	| multiplicative_expression POW value { $$=createPowExpression($1, $3); }
195 | 	| STR LPAREN expression RPAREN { $$=$3; } 	
196 | 	| SLBRACE commaseparray SRBRACE { $$=createArrayExpression($2, NULL); }
197 | 	| SLBRACE commaseparray SRBRACE MULT value { $$=createArrayExpression($2, $5); }
198 | 	| INPUT LPAREN RPAREN { $$=appendNativeCallFunctionStatement("rtl_input", NULL, NULL); }
199 | 	| INPUT LPAREN expression RPAREN { $$=appendNativeCallFunctionStatement("rtl_inputprint", NULL, $3); }	
200 | ;
201 | 
202 | commaseparray
203 | 	: expression { $$=getNewStack(); pushExpression($$, $1); }
204 | 	| commaseparray COMMA expression { pushExpression($1, $3); }
205 | ;
206 | 
207 | value
208 | 	: constant { $$=$1; }
209 | 	| LPAREN expression RPAREN { $$=$2; }
210 | 	| identscalararray { $$=$1; }
211 | 	| ident LPAREN fncallargs RPAREN { $$=appendCallFunctionStatement($1, $3); }
212 | 	| NATIVE ident LPAREN fncallargs RPAREN { $$=appendNativeCallFunctionStatement($2, $4, NULL); }
213 | 	| ID LPAREN ident RPAREN { $$=appendReferenceStatement($3); }
214 | 	| SYMBOL LPAREN ident RPAREN { $$=appendSymbolStatement($3); }
215 | ;
216 | 
217 | identscalararray
218 | 	: ident { $$=createIdentifierExpression($1, 0); }
219 | 	| ident arrayaccessor { $$=createIdentifierArrayAccessExpression($1, $2); }
220 | ;
221 | 
222 | identscalararraylhs
223 | 	: ident { $$=createIdentifierExpression($1, 1); }
224 | 	| ident arrayaccessor { $$=createIdentifierArrayAccessExpression($1, $2); }
225 | 
226 | ident
227 | 	: IDENTIFIER { $$ = malloc(strlen($1)+1); strcpy($$, $1); }	
228 | ;
229 | 
230 | constant
231 |         : INTEGER { $$=createIntegerExpression($1); }
232 |         | REAL { $$=createRealExpression($1); }
233 | 	| unary_operator INTEGER { $$=createIntegerExpression($1 * $2); }	
234 | 	| unary_operator REAL { $$=createRealExpression($1 * $2); }		
235 | 	| STRING { $$=createStringExpression($1); }	
236 | 	| TRUE { $$=createBooleanExpression(1); }
237 | 	| FALSE { $$=createBooleanExpression(0); }
238 | 	| NONE { $$=createNoneExpression(); }
239 | ;
240 | 
241 | unary_operator
242 | 	: ADD { $$ = 1; }
243 | 	| SUB { $$ = -1; }
244 | ;
245 | 
246 | %%
247 | 


--------------------------------------------------------------------------------
/docs/tutorial7.md:
--------------------------------------------------------------------------------
  1 | # Epiphany as an accelerator: managing device data
  2 | 
  3 | In [tutorial 6](tutorial6.md) we looked at using the *offload* decorator on functions (or kernels) to execute them on Epiphany cores. So far we have assumed that data is copied in (via function arguments) for each kernel execution and then copied back to the host (via the return value) once code has finished executing on the Epiphany. Transfering data from host to device and back again is actually really expensive - in the world of GPUs you need to be really careful that the cost of data transfer does not outweigh the computational benefits of the accelertor. 
  4 | 
  5 | In addition to offloading functions, it is also possible to define and manage what we call device resident data - i.e. variables that are allocated on each Epiphany core and stay in memory between kernels runs.
  6 | 
  7 | Before going any further, if you have not yet used or installed ePython then it is worth following the first tutorial ([here](tutorial1.md)) which walks you though installing ePython and running a simple "hello world" example on the Epiphany cores. If you installed ePython a while ago then it is worth ensuring that you are running the latest version, instructions for upgrading are available [here](installupgrade.md)
  8 | 
  9 | **Important:** Unlike some other ePython tutorials, all the code snipets here are to be executed under the CPython interpreter (using the *python* command.)
 10 | 
 11 | ### Defining device resident data
 12 | 
 13 | The *epython* module contains a function, *define_on_device* which will define any variable with its current state on each Epiphany core. For instance in the code snippet below the variable *a* has been declared on the host to be an array of size 10. This is then declared on each Epiphany core and each core has its own, private, copy of this variable. These copies are entirely independent and for instance there is nothing stopping the programmer changing the structure or values of the arrays on some of the cores or the host. Once you have defined some data on the Epiphanies these variables are entirely indepdent from each other, for instance changes to variable *a* on Epiphany core 0 will have no impact on any other cores or the host copy of the data.
 14 | 
 15 | ```python
 16 | from epython import offload, define_on_device, copy_from_device
 17 | 
 18 | a=[0]*10
 19 | 
 20 | define_on_device(a)
 21 | 
 22 | @offload
 23 | def updateA(i):
 24 |   from parallel import coreid
 25 |   a[i]=i * coreid()
 26 | 
 27 | for i in range(10):
 28 |   updateA(i)
 29 | 
 30 | print copy_from_device("a")
 31 | ```
 32 | 
 33 | In this code snippet the host is launching the *updateA* kernel on each Epiphany core 10 times. For each kernel launch the array index is passed in and the Epiphany core will set that location in its copy of variable *a* to be *i* multiplied by the ID of the core. Most importantly you can see that variable *a* stays resident on the Epiphany cores between calls of the kernel, so we don't need to pay the penalty of copying this variable to and from the cores on every kernel launch.
 34 | 
 35 | At the end of the code the *copy_from_device* is issued on the host to copy the device resident data held in variable *a* on every core back to the host. The host then displays this - it will display 16 arrays each of size 10 elements as there are 16 copies (one per core) of this data. 
 36 | 
 37 | You can see with the *copy_from_device* function, we are refering to the variable *a* by a string of its name (i.e. *"a"* rather than *a*.)
 38 | 
 39 | ### Updating data on the device from the host
 40 | 
 41 | The code snippet in this section below illustrates the *copy_to_device* function, where again we define the variable *a* on all Epiphany cores as well as the host. This is first updated on each core to be the value *19* by the call to the *updateA* kernel. Then we copy the value 99 into *a* held on each Epiphany core and overwrite the previous value *19*) with this.
 42 | 
 43 | ```python
 44 | from epython import offload, define_on_device, copy_to_device, copy_from_device
 45 | 
 46 | a=23
 47 | 
 48 | define_on_device(a)
 49 | 
 50 | @offload
 51 | def updateA():
 52 |   a=19
 53 | 
 54 | print copy_from_device("a")
 55 | updateA()
 56 | print copy_from_device("a")
 57 | copy_to_device("a", 99)
 58 | print copy_from_device("a")
 59 | ```
 60 | 
 61 | We have illustrated this with single valued variable (scalars), but these functions can equally be used for arrays too. Copying data from the host to the device can be useful as a code progresses. This might be updated values from the host after it has done some more processing, or alternatively it might be a general *scratch* space you reuse between kernel calls and the host is copying in some input data for a series of kernel calls it is about to launch on the cores.
 62 | 
 63 | ### Data transfer onto a subset of cores
 64 | 
 65 | Exactly the same with kernels, we can use additional arguments to these data transfer functions to run them in an asynchronous, non-blocking, manner and/or perform data transfer on a subset of the cores. The code snippet below is very similar to the previous one, but on line 10 we are only copying values of *a* held on cores 7 and 8 back - so the list that is displayed is of size 2 rather than 16. At line 13 we are only changing the value of *a* to be 99 on cores 1, 5 and 9, all other cores retain the existing value of *19*.
 66 | 
 67 | ```python
 68 | from epython import offload, define_on_device, copy_to_device, copy_from_device
 69 | 
 70 | a=23
 71 | 
 72 | define_on_device(a)
 73 | 
 74 | @offload
 75 | def updateA():
 76 |   a=19
 77 | 
 78 | print copy_from_device("a", target=[7,8])
 79 | updateA()
 80 | print copy_from_device("a")
 81 | copy_to_device("a", 99, target=[1,5,9])
 82 | print copy_from_device("a")
 83 | ```
 84 | 
 85 | ### Asynchronous data transfer
 86 | 
 87 | So far the data transfer calls we have looked at have been used in a blocking manner. What I mean from this is that the host will stop and block until the data copy (either to or from) the Epiphany core(s) has completed before continuing. This can be really expensive and the host can be wasting time waiting for data transfers to complete rather than getting on with useful work. Of course sometimes you definately want this blocking behaviour, but equally other times it can be useful to kick off a data transfer and then go and do something else whilst this is in progress.
 88 | 
 89 | ```python
 90 | from epython import offload, define_on_device, copy_to_device, copy_from_device
 91 | from random import random
 92 | 
 93 | a=[0]*100
 94 | 
 95 | define_on_device(a)
 96 | 
 97 | for i in range(100):
 98 |   a[i]=random()
 99 | 
100 | h=copy_to_device("a", a, async=True, target=range(10))
101 | 
102 | @offload
103 | def add(d1,d2):
104 |   return d1+d2
105 | 
106 | somevalue=add(1,5, target=[11])
107 | h.wait()
108 | 
109 | h=copy_from_device("a", target=[1], async=True)
110 | 
111 | print h.test()
112 | print h.wait()
113 | ```
114 | 
115 | In the code snipet here we define an array of size 100 on each Epiphany core and then fill this up with random numbers on the host. The *copy_to_device* function is issued to update this on cores 0 to 9, but this is done asynchronously so that the call completes immediately without waiting for data to have been physically copied. At this point you MUST NOT update the host variable *a* (until the call has completed) as it is actively copying data in the background. Whilst the copying is on going, the *add* function is launched on core 11 and then we use *h.wait()* to wait on the handler *h* which is tracking the asynchronous data copying (at this point you can then change *a* on the device.)
116 | 
117 | Similarly at line 20 we asynchronously copy the data held in *a* from Epiphany core 0, completion is tested for at line 22 and waited on (and then displayed) at line 23. You can see that this feels very similar to the way we launch kernels asynchronously and on a subset of the Epiphany cores - this is no accident and actually behind the scenes it uses the exact same mechanism.
118 | 
119 | ### Chaining data transfer and kernal launches
120 | 
121 | It is often useful to chain data transfers and kernel launches, so that you can asynchronously kick off all the aspects that need to run on the Epiphany (data transfers and kernel runs) at a single point, then go ahead and do other stuff on the host. Crticically when you do this you need to be confident that the data transfer will complete before the kernel executes. The good news is that in ePython the order of launch is guaranteed to be the order in which operations are scheduled, such that if *A*, *B* and *C* are scheduled to run on a specific Epiphany core then we guarantee that they will execute in that order.
122 | 
123 | ```python
124 | from epython import offload, define_on_device, copy_to_device, waitAll
125 | from random import random
126 | 
127 | a=[0]*100
128 | 
129 | define_on_device(a)
130 | 
131 | for i in range(100):
132 |   a[i]=random()
133 |   
134 | @offload(async=True)
135 | def addAllA():
136 |   i=0
137 |   value=0
138 |   while i < size(a):
139 |     value+=a[i]
140 |     i+=1
141 |   return value
142 | 
143 | h1=copy_to_device("a", a, async=True, target=[1])
144 | h2=addAllA(target=[0,1])
145 | print waitAll(h1,h2)
146 | ```
147 | 
148 | The code snippet of this section illustates this concept, where an asynchronous data copy is started on core 1 (returning the handle *h1*.) Then the *addAllA* kernel is launched on cores 0 and 1 asynchronously and we wait for both handles at line 22. Due to the ordering of launch, ePython guarantees that the *addAllA* kernel will only execute on core 1 once the data transfer has completed to that core. There is no data transfer to core 0, so the kernel will launch immediately and return 0.
149 | 
150 | ### Summary
151 | 
152 | In this tutorial we have looked at offloading parts of Python codes onto Epiphany cores in more detail. Carefully managing device resident data, often with asynchronous data copying, can provide significant performance benefits over simply copying all data on every kernel launch. For more information you can refer to the  <a href="https://github.com/mesham/epython/blob/dev/examples/jacobi_offload.py" target="_blank">Jacobi offload example</a> which uses device resident data. In this example you can see we launch the two kernels for every iteration which is quite slow (we are constantly launching kernels on the Epiphany cores rather than computation!) As an exercise modify the code so that it moves the iteration loop into the kernel, so that kernels only need to launched once for the entire run.
153 | 


--------------------------------------------------------------------------------
/host/configuration.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2015, Nick Brown
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that the following conditions are met:
  7 |  *
  8 |  * Redistributions of source code must retain the above copyright notice, this
  9 |  * list of conditions and the following disclaimer.
 10 |  *
 11 |  * Redistributions in binary form must reproduce the above copyright notice,
 12 |  * this list of conditions and the following disclaimer in the documentation
 13 |  * and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <stddef.h>
 28 | #include <string.h>
 29 | #include <stdlib.h>
 30 | #include <stdio.h>
 31 | #include <ctype.h>
 32 | #include "configuration.h"
 33 | #ifndef HOST_STANDALONE
 34 | #include "device-support.h"
 35 | #else
 36 | #define TOTAL_CORES 1
 37 | #endif
 38 | 
 39 | static void parseCommandLineArguments(struct interpreterconfiguration*, int, char**);
 40 | static void parseCoreActiveInfo(struct interpreterconfiguration*, char*);
 41 | static int areStringsEqualIgnoreCase(char*, char*);
 42 | static void displayHelp(void);
 43 | 
 44 | /**
 45 |  * Given the command line arguments this will read the configuration and return the configuration structure
 46 |  * which has the appropriate flags set and contains strings etc
 47 |  */
 48 | struct interpreterconfiguration* readConfiguration(int argc, char *argv[]) {
 49 | 	int i;
 50 | 	struct interpreterconfiguration* configuration=(struct interpreterconfiguration*) malloc(sizeof(struct interpreterconfiguration));
 51 | 	configuration->intentActive=(char*) malloc(TOTAL_CORES);
 52 | 	for (i=0;i<TOTAL_CORES;i++) configuration->intentActive[i]=1;
 53 | 	configuration->displayStats=configuration->displayTiming=configuration->forceCodeOnCore=
 54 | 			configuration->forceCodeOnShared=configuration->forceDataOnShared=configuration->displayPPCode=0;
 55 | 	configuration->filename=configuration->compiledByteFilename=configuration->loadByteFilename=configuration->pipedInContents=NULL;
 56 | 	parseCommandLineArguments(configuration, argc, argv);
 57 | 	return configuration;
 58 | }
 59 | 
 60 | /**
 61 |  * Parses command line arguments
 62 |  */
 63 | static void parseCommandLineArguments(struct interpreterconfiguration* configuration, int argc, char *argv[]) {
 64 | 	if (argc == 1) {
 65 | 		displayHelp();
 66 | 		exit(0);
 67 | 	} else {
 68 | #ifdef HOST_STANDALONE
 69 | 		configuration->hostProcs=1;
 70 | #else
 71 | 		configuration->hostProcs=0;
 72 | #endif
 73 | 		configuration->coreProcs=0;
 74 | 		configuration->loadElf=1;
 75 | 		configuration->loadSrec=0;
 76 | 		configuration->fullPythonHost=0;
 77 | 		int i, coreplacement=0;
 78 | 		for (i=1;i<argc;i++) {
 79 | 			if (areStringsEqualIgnoreCase(argv[i], "-s")) {
 80 | 				configuration->displayStats=1;
 81 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-pp")) {
 82 | 				configuration->displayPPCode=1;
 83 |                         } else if (areStringsEqualIgnoreCase(argv[i], "-srec")) {
 84 | 				configuration->loadElf=0;
 85 | 		                configuration->loadSrec=1;
 86 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-elf")) {
 87 | 				configuration->loadElf=1;
 88 | 		                configuration->loadSrec=0;
 89 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-t")) {
 90 | 				configuration->displayTiming=1;
 91 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-fullpython")) {
 92 | 				configuration->fullPythonHost=1;
 93 | 				configuration->hostProcs=1;
 94 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-datashared")) {
 95 | 				configuration->forceDataOnShared=1;
 96 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-codecore")) {
 97 | 				configuration->forceCodeOnCore=1;
 98 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-codeshared")) {
 99 | 				configuration->forceCodeOnShared=1;
100 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-o")) {
101 | 				if (i+1 ==argc) {
102 | 					fprintf(stderr, "When specifying to output compiled bytes then you must provide a filename for this\n");
103 | 					exit(0);
104 | 				} else {
105 | 					configuration->compiledByteFilename=argv[++i];
106 | 				}
107 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-l")) {
108 | 				if (i+1 ==argc) {
109 | 					fprintf(stderr, "When specifying to load from a byte file then you must provide a filename for this\n");
110 | 					exit(0);
111 | 				} else {
112 | 					configuration->loadByteFilename=argv[++i];
113 | 				}
114 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-help")) {
115 | 				displayHelp();
116 | 				exit(0);
117 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-h")) {
118 | 				if (i+1 ==argc) {
119 | 					fprintf(stderr, "You must provide a number of host processes to use\n");
120 | 					exit(0);
121 | 				} else {
122 | 					if (coreplacement) {
123 | 						fprintf(stderr, "Can not specify explicit core placement and have host virtual processes\n");
124 | 						exit(0);
125 | 					}
126 | 					configuration->hostProcs=atoi(argv[++i]);
127 | 					if (configuration->fullPythonHost) configuration->hostProcs++;
128 | 				}
129 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-d")) {
130 | 				if (i+1 ==argc) {
131 | 					fprintf(stderr, "You must provide a number of device processes to use\n");
132 | 					exit(0);
133 | 				} else {
134 | 					int j, device_procs=atoi(argv[++i]);
135 | 					for (j=0;j<16;j++) {
136 | 						configuration->intentActive[j]=j<device_procs ? 1 : 0;
137 | 					}
138 | 				}
139 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-pipein")) {
140 | 				if (i+1 ==argc) {
141 | 					fprintf(stderr, "You must provide the Python code contents with the -pipein flag\n");
142 | 					exit(0);
143 | 				} else {
144 | 					configuration->pipedInContents=argv[++i];
145 | 				}
146 | 			} else if (areStringsEqualIgnoreCase(argv[i], "-c")) {
147 | 				if (i+1 ==argc) {
148 | 					fprintf(stderr, "When specifying core placement you must provide arguments\n");
149 | 					exit(0);
150 | 				} else {
151 | 					if (configuration->hostProcs > 0) {
152 | 						fprintf(stderr, "Can only specify explicit core placement with no host virtual processes\n");
153 | 						exit(0);
154 | 					}
155 | 					coreplacement=1;
156 | 					parseCoreActiveInfo(configuration, argv[++i]);
157 | 				}
158 | 			} else {
159 | 				if (configuration->filename != NULL) {
160 | 					fprintf(stderr, "Only one filename can be provided, you have suppled '%s' and '%s'\n", configuration->filename, argv[i]);
161 | 					exit(0);
162 | 				} else {
163 | 					configuration->filename=argv[i];
164 | 				}
165 | 			}
166 | 		}
167 | 		if (configuration->loadByteFilename == NULL && configuration->filename == NULL && configuration->pipedInContents == NULL) {
168 | 			fprintf(stderr, "You must supply a file to run as an argument, see -h for details\n");
169 | 			exit(0);
170 | 		}
171 | #ifndef HOST_STANDALONE
172 | 		for (i=0;i<16;i++) if (configuration->intentActive[i]) configuration->coreProcs++;
173 | #endif
174 | 	}
175 | }
176 | 
177 | /**
178 |  * Determines the active cores if the user supplied -c n, can be a single integer, a list, a range or
179 |  * all to select all cores
180 |  */
181 | static void parseCoreActiveInfo(struct interpreterconfiguration* configuration, char * info) {
182 | 	int i;
183 | 	if (areStringsEqualIgnoreCase(info, "all")) {
184 | 		for (i=0;i<16;i++) configuration->intentActive[i]=1;
185 | 	} else {
186 | 		if (strchr(info, ',') != NULL) {
187 | 			char vn[5];
188 | 			int s;
189 | 			for (i=0;i<16;i++) configuration->intentActive[i]=0;
190 | 			while (strchr(info, ',') != NULL) {
191 | 				s=strchr(info, ',')-info;
192 | 				memcpy(vn, info, s);
193 | 				vn[s]='\0';
194 | 				configuration->intentActive[atoi(vn)]=1;
195 | 				info=strchr(info, ',')+1;
196 | 			}
197 | 			configuration->intentActive[atoi(info)]=1;
198 | 		} else if (strchr(info, ':') != NULL) {
199 | 			char vn[5];
200 | 			int s;
201 | 			s=strchr(info, ':')-info;
202 | 			memcpy(vn, info, s);
203 | 			vn[s]='\0';
204 | 			int from=atoi(vn);
205 | 			int to=atoi(strchr(info, ':')+1);
206 | 			for (i=0;i<16;i++) {
207 | 				if (i >= from && i<= to) {
208 | 					configuration->intentActive[i]=1;
209 | 				} else {
210 | 					configuration->intentActive[i]=0;
211 | 				}
212 | 			}
213 | 		} else {
214 | 			for (i=0;i<16;i++) configuration->intentActive[i]=0;
215 | 			configuration->intentActive[atoi(info)]=1;
216 | 		}
217 | 	}
218 | }
219 | 
220 | /**
221 |  * Displays the help message with usage information
222 |  */
223 | static void displayHelp() {
224 | 	printf("Epiphany Python version %s\n", VERSION_IDENT);
225 | 	printf("epython [arguments] filename\n\nWhere filename is the source code to execute by default on all cores\n\nArguments\n--------\n");
226 | #ifndef HOST_STANDALONE
227 | 	printf("-c placement   Specify core placement; can be a single id, all, a range (a:b) or a list (a,b,c,d)\n");
228 | 	printf("-d processes   Specify number of process on the device\n");
229 | 	printf("-h processes   Specify number of process on the host\n");
230 | 	printf("-t             Display core run timing information\n");
231 | 	printf("-codecore      Placement code on each core (default up to %d bytes length)\n", CORE_CODE_MAX_SIZE);
232 | 	printf("-codeshared    Placement code in shared memory (automatic after %d bytes in length)\n", CORE_CODE_MAX_SIZE);
233 | 	printf("-datashared    Data (arrays and strings) stored in shared memory, storage on core is default\n");
234 |         printf("-elf           Use ELF device executable\n");
235 |         printf("-srec          Use SREC device executable\n");
236 | #endif
237 | 	printf("-s             Display parse statistics\n");
238 | 	printf("-pp            Display preprocessed code\n");
239 | 	printf("-o filename    Write out the compiled byte representation of processed Python code and exits (does not run code)\n");
240 | 	printf("-l filename    Loads from compiled byte representation of code and runs this\n");
241 | 	printf("-help          Display this help and quit\n");
242 | }
243 | 
244 | /**
245 |  * Tests two strings for equality, ignoring the case - this is for case insensitive variable name matching
246 |  */
247 | static int areStringsEqualIgnoreCase(char * s1, char * s2) {
248 | 	size_t s1_len=strlen(s1), s2_len=strlen(s2), i;
249 | 	if (s1_len != s2_len) return 0;
250 | 	for (i=0;i<s1_len;i++) {
251 | 		if (tolower(s1[i]) != tolower(s2[i])) return 0;
252 | 	}
253 | 	return 1;
254 | }
255 | 


--------------------------------------------------------------------------------
/docs/tutorial2.md:
--------------------------------------------------------------------------------
  1 | # Parallel messaging with the Epiphany
  2 | 
  3 | In this tutorial we are going to explore the parallel capabilities of the Epiphany and develop some parallel codes which take advantage of the fact that we have multiple cores available. The techniques we will be discussing here are the same as those used by HPC programmers to write large scale parallel codes on modern supercomputers.
  4 | 
  5 | Before going any further, if you have not yet used or installed ePython then it is worth following the first tutorial ([here](tutorial1.md)) which walks you though installing ePython and running a simple "hello world" example on the Epiphany cores. If you installed ePython a while ago then it is worth ensuring that you are running the latest version, instructions for upgrading are available [here](installupgrade.md)
  6 | 
  7 | ### Point to point communications
  8 | 
  9 | In parallel programming, one of the fundamental activities is sending a message from one core to another. Open up a text editor and enter the following code before saving it as *p2p.py*
 10 | 
 11 | ```python
 12 | import parallel
 13 | 
 14 | if coreid()==0:
 15 |   send(20, 1)
 16 | elif coreid()==1:
 17 |   print "Got value "+recv(0)+" from core 0"
 18 | ```
 19 | 
 20 | Now issue *epython p2p.py* and core 1 will display that it received the value *20* from core 0. The parallel functions of ePython (in this case *coreid*, *send*, *recv*) are located in the parallel module, therefore before writing any parallel codes in ePython you need to import this as per line 1. At line 4 core 0 will send the value 20 to core 1 - here we supply the value 20 directly but a variable or return value from a function could also be used, and at line 6 core 1 will receive this value from core 0. These send and receive communication calls are blocking, which means that the core will not continue until it has either fully sent or fully received the value. This is something you need to be aware of, as if there is no matching communication call then the core will block and wait forever - for instance edit line 4 and instead of sending to core 1 send to core 2 (the second argument to the *send* function.) Now rerun the code, you will see that the code does not terminate (you can force quit by pressing ctrl and c) because now core 0 is sending to core 2, but core 2 has not issued a receive and core 1 is attempting receiving from core 0 but there is no message being sent to core 1. If you edit line 5 and change the core id from 1 to 2 then this will ensure communications match and fix the issue.
 21 | 
 22 | This first example has illustrated two very important concepts, firstly point to point communications and secondly accessing a core's ID (via *coreid*) and selectively executing some code on that core only based upon its id. 
 23 | 
 24 | ### Broadcasting values
 25 | 
 26 | The point to point communications that we have just seen only involve 2 cores for each communication call, the sender and the receiver. Collective communications involve all cores and the first example of this we are going to see is the broadcast, where we broadcast a value from one core (commonly called the root) to every other core. 
 27 | 
 28 | ```python
 29 | import parallel
 30 | 
 31 | a=bcast(numcores(), 0)
 32 | print "The number from core 0 is "+a
 33 | ```
 34 | 
 35 | In this example the root (core 0) will broadcast the number of cores (determined by the *numcores* function) to every other core including itself, and each core will display this value. The provided value to the function (in this case the number of cores) is only relevant on the root and is ignored by every other core. You can specify the number of Epiphany cores to run on via the *-d* command line argument, for instance, if this is called *bcast.py* then executing *epython -d 5 bcast.py* will run on 5 cores of the Epiphany and your code will report this number. Currently each core executes line 4 and displays the message, based upon what we have already looked at in this tutorial modify the code so that only core 3 prints out the message.
 36 | 
 37 | ```python
 38 | import parallel
 39 | from random import randrange
 40 | 
 41 | rootcore=2
 42 | a=0
 43 | if coreid()==rootcore:
 44 |   a=bcast(randrange(100), rootcore)
 45 | else:
 46 |   a=bcast(none, rootcore)
 47 | print "The random number from core "+rootcore+" is "+a
 48 | ```
 49 | 
 50 | This code example illustrates a more common format followed by the broadcast, where only the root core (in this case core 2) has some value (in this case a random number between 0 and 100) which it then broadcasts amongst the other cores. On every non root core the input value to the broadcast is ignored, so every other core either provides a dummy value as the value to the broadcast or *none*, which is Python's way of representing the absence of a value. The way this example is written, by modifying the value of the rootcore variable at line 3 you will change the root core for this broadcast call.
 51 | 
 52 | ### Reducing values
 53 | 
 54 | As well as broadcasting values from one process to all others, it is often useful to combine values held on every core together in order to determine a final result. For instance, one might wish to find the maximum value held by each core, or add all the values together. This is known as a reduction, where each core will supply a value and operation to perform, and the final result available on each core is the reduction of these individual values.
 55 | 
 56 | ```python
 57 | import parallel
 58 | from random import randrange
 59 | 
 60 | a=reduce(randrange(100), "max")
 61 | print "The highest random number is "+a
 62 | ```
 63 | 
 64 | In this code each core is generating a random number (using the *random* function, and %100 takes the modulo to 100, hence the number will always be between 0 and 100), and by using the *reduce* call with the *max* argument, the communication function determines the maximum random number that was generated by any core, which is then available to every core in the a variable. One of four operators can be used, *max*, *min*, *sum* and *prod*. By editing the code and changing the operator then you will see the difference this makes to the reported value.
 65 | 
 66 | ### Synchronisation
 67 | 
 68 | It can be useful for all cores to stop and wait for every other core to reach a specific point in the code before proceeding, this is known as synchronisation (or a barrier) and available via the *sync* function.
 69 | 
 70 | ```python
 71 | import parallel
 72 | 
 73 | print "Hello from core "+coreid()
 74 | sync()
 75 | print "After sync from core "+coreid()
 76 | ```
 77 | 
 78 | In this example every core will display an initial message, then stop and wait for every other core before displaying its next message. By commenting out the sync call at line 5 you will remove this stop and wait, and some cores might race ahead of others. As a general note, whilst this barrier synchronisation can be useful (and is often seen in many HPC codes) it is generally considered bad practice because each core is idle whilst it is waiting for every other core, instead of continuing ahead and doing useful work.
 79 | 
 80 | ### Putting it all together - a parallel code to estimate PI
 81 | 
 82 | <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/f/fb/Darts_in_a_dartboard.jpg/283px-Darts_in_a_dartboard.jpg" width="250" align="left">
 83 | We are going to estimate the value of PI via the dartboard method, which is an example of a Monte Carlo method (https://en.wikipedia.org/wiki/Monte_Carlo_method.) Basically, imagine we have a dartboard mounted on a wooden backing and the dartboard fits perfectly within this wooden backing as per the diagram.
 84 | 
 85 | If the radius of the dartboard is one, then the area of the board will be PI, as the dartboard fits snugly on the wooden backing then the area of the wood is 4 (2 by 2.) Therefore this means the ratio of the area of the circle to that of the wood is pi/4. If we throw lots of darts at the board then randomly some will land on the board and some on the wooden backing, but by probability the ratio of the number landing on the dartboard vs the number that is thrown will be pi/4. 
 86 | 
 87 | Each Epiphany core will simulate the throwing of lots of darts at this dartboard, and by tracking the number which land on the board across all cores we can estimate PI. The more darts which are thrown, the more accurate our approximation of PI.
 88 | 
 89 | ```python
 90 | import parallel
 91 | from random import random
 92 | from math import pow
 93 | 
 94 | darts=100
 95 | rounds=0
 96 | mypi=0.0
 97 | 
 98 | if coreid()==0:
 99 |   rounds=input("Enter the number of rounds: ")
100 |   rounds=bcast(rounds, 0)
101 | else:
102 |   rounds=bcast(none, 0)
103 | i=1
104 | while i<=rounds:
105 |   score=0.0
106 |   j=1
107 |   while j<=darts:
108 |     x=random()
109 |     y=random()
110 | 
111 |     if (pow(x,2) + pow(y,2) < 1.0):
112 |       score=score+1
113 |     j+=1
114 |   mypi=mypi+4.0 * (score/darts)
115 |   i+=1
116 | mypi=reduce(mypi, "sum")
117 | if coreid()==0: print "Value of PI="+(mypi/rounds)/numcores()
118 | ```
119 | 
120 | In this code each core works in rounds, throwing *darts* number of darts per round. Initially core 0 requests from the user the number of rounds to run (10 is a good starting number), which is then broadcast amongst the cores at lines 9 and 11. Remember the provided value to a broadcast collective is only relevant on the root core (in this case core 0) - you can see at line 9 that core 0 will broadcast the *rounds* value, which has been inputted by the user, and every other core at line 11 issues the broadcast call with the *none* value, which is Python's way of representing the absence of a value. What we call the computational kernel, the heart of what each core is actually doing, is at lines 13-24 which performs the Monte Carlo method and then at line 25 the values determined at each core are summed together and then displayed at line 26 by core 0. 
121 | 
122 | By increasing the number of rounds we increase the accuracy of the answer, but the cost is an increase in runtime. You can use the *-t* command line argument to display timing information for each core, for instance *epython -t pi.py*, run with 10, 50, 100 and 500 rounds and you will see the difference (be patient with 500 rounds it takes a few seconds!)
123 | 
124 | As a general note, we have two extremes when classifying parallelism; at one end tightly coupled problems where each core must very extensively communicate with other cores and at the other end embarrassingly parallel problems where very little (if any) communication is needed. Most HPC codes sit somewhere between these extremes and this example is towards the embarrassingly parallel side, because there are only 2 communications (the initial broadcast and final reduction) and importantly there are no communications required in the computational kernel, so each core can just get on with its computational task. Communications add overhead, so it is useful to understand where a parallel code sits on this scale to give an idea of likely performance and scalability.
125 | 
126 | ### Summary
127 | 
128 | In this tutorial we have used ePython to introduce some of the basic building blocks of parallelism and shown how quick and easy it is to write parallel codes on the Epiphany. The PI example that we looked at is a simple illustration of a Monte Carlo method, many codes running on the latest supercomputers are based around Monte Carlo methods and more generally the ideas of core identification, point to point & collective communications form the basis of the majority of HPC codes.
129 | 


--------------------------------------------------------------------------------
/docs/tutorial4.md:
--------------------------------------------------------------------------------
  1 | # Pipelines on the Epiphany
  2 | In the previous tutorial (available [here](tutorial3.md)) we looked at splitting a problem up geometrically. Driven by the decomposition of the data, different parts of the problem ran on different Epiphany cores with these cores often needing to communuicate when a neighbouring value held on another core was required. 
  3 | 
  4 | Whilst geometric decomposition is a very common approach not all problems are suited to being split around the geometry of the data and instead this tutorial we will look at splitting up a problem based upon the flow of data, known as a pipeline.
  5 | 
  6 | Before going any further, if you have not yet used or installed ePython then it is worth following the first tutorial ([here](tutorial1.md)) which walks you though installing ePython and running a simple "hello world" example on the Epiphany cores. If you installed ePython a while ago then it is worth ensuring that you are running the latest version, instructions for upgrading are available [here](installupgrade.md)
  7 | 
  8 | ### Pipeline
  9 | Data flows into the first stage, some processing is performed on it and a resulting value flows into the next stage, which performs processing and passes it onto the next stage etc.. Once data has been sent from one stage to the next then that stage is ready to receive some more data and start processing that. 
 10 | <img src="https://raw.githubusercontent.com/mesham/epython/master/docs/pipeline.jpg">
 11 | 
 12 | This is illustrated in the diagram, in a pipeline data only flows one way (here from left to right) and at each stage the data is refined, from its initial "raw" value to the final "finished" value. Ideally you want all stages in the pipeline to be busy, when your program starts it takes some time to fill up the pipeline and at the end the pipeline drains. The simplest approach to a pipeline will map a single stage to a single Epiphany core.
 13 | 
 14 | ### ePython pipeline
 15 | Now it's time for an example, based upon a large set of numbers we want to know the percentage of numbers that are contiguous, i.e. where the same numeric value lies one after another. This leads to a pipeline with four stages:
 16 | <ol>
 17 | <li><b>Stage 1:</b> Decide the number of data elements (chosen randomly) for that specific sequence.</li>
 18 | <li><b>Stage 2:</b> Based upon the number of elements generate random numbers for each of these.</li>
 19 | <li><b>Stage 3:</b> Sorts the number sequence</li>
 20 | <li><b>Stage 4:</b> Progresses through the sequence and counts the number of contiguous elements, the percentage of which is output</li>
 21 | </ol>
 22 | 
 23 | The input to the entire pipeline is the number of sequences to work on and the output of the pipeline is the percentage of contiguous numbers in that sequence.
 24 | 
 25 | ```python
 26 | import parallel
 27 | import util
 28 | from random import randrange
 29 | 
 30 | data=[0]*510
 31 | 
 32 | if (coreid()==0):
 33 |   pipelineStageOne(100)
 34 | elif (coreid()==1):
 35 |   pipelineStageTwo()
 36 | elif (coreid()==2):
 37 |   pipelineStageThree()
 38 | elif (coreid()==3):
 39 |   pipelineStageFour()
 40 | 
 41 | def pipelineStageOne(num_items):
 42 |   for i in range(num_items):
 43 |     num=randrange(500) + 5
 44 |     send(num, coreid()+1)
 45 |   send(-1,coreid()+1)
 46 | 
 47 | def pipelineStageTwo():
 48 |   num=0
 49 |   while num >= 0:
 50 |     num=recv(coreid()-1)
 51 |     if num > 0:
 52 |       i=0
 53 |       while i < num:
 54 |         data[i]=randrange(10)
 55 |         i+=1
 56 |     send(num, coreid()+1)
 57 |     if num > 0: send(data, coreid()+1, num)
 58 | 
 59 | def pipelineStageThree():
 60 |   num=0
 61 |   while num >=0:
 62 |     num=recv(coreid()-1)
 63 |     if num > 0:
 64 |       data=recv(coreid()-1, num)
 65 |       oddSort(data, num)
 66 |     send(num, coreid()+1)
 67 |     if num > 0: send(data, coreid()+1, num)
 68 |     
 69 | def pipelineStageFour():
 70 |   num=0
 71 |   num_contig=0.0
 72 |   total_num=0
 73 |   while num >=0:
 74 |     num=recv(coreid()-1)
 75 |     if num > 0:
 76 |       total_num+=num
 77 |       data=recv(coreid()-1, num)
 78 |       cnum=data[0]
 79 |       ccount=1
 80 |       i=0
 81 |       while i < num:
 82 |         if (data[i] == cnum):
 83 |           ccount+=1
 84 |         else:
 85 |           num_contig+=ccount
 86 |           cnum=data[i]
 87 |           ccount=0
 88 |         i+=1
 89 |     chance=(num_contig/total_num)*100
 90 |     print chance+"% of numbers were contiguous"
 91 | ```
 92 | 
 93 | **This is an illustration of the code, the executable version is <a href="https://github.com/mesham/epython/blob/master/examples/pipeline.py" target="_blank">here</a>**
 94 | 
 95 | Based upon its core ID, a core will execute a specific pipeline stage function where it waits for data and, once it has received this, will process the data and send results onto the next stage. The *oddSort* function (in the util module) will perform an odd-even sort on the number sequence. At the end of the pipeline, stage one will send the value *-1* to stage two, which will then send it along to the next stage and quit. This action is repeated for the other stages and this is known a sentinal or poison pill, which will shut the pipeline down and this is the common way in which one terminates parallel pipelines.
 96 | 
 97 | So, we now have a pipeline which passes data between the stages and each stage operates on this data. However there is a problem, namely that the amount of work per pipeline stage is very uneven. For instance stage 1 will progress very quickly, whereas stage 3 (the sorting stage) will take much longer and fast stages will be held up by the slower stages. Bear in mind though, that we are only mapping one stage to one Epiphany core, so our current pipeline is only using 4 of the Epiphany cores. Hence we have 12 idle cores and how can we take advantage of these to help address our work imbalance problem and improve performance?
 98 | 
 99 | ### Splitting the pipeline
100 | What we are going to do here is keep stage 1 unique (i.e. on core 0), but then duplicate stages 2, 3 and 4 across all the remaining cores. This is known as a non-linear pipeline and it will look like the diagram here:
101 | <img src="https://raw.githubusercontent.com/mesham/epython/master/docs/split_pipeline.jpg">
102 | 
103 | Importantly this approach keeps all the cores busy and we have further parallelised the problem by adopting this splitting. Not only will each of the four stages operate in parallel, but also multiple cores will be performing the exact same stage work.
104 | 
105 | ```python
106 | .....
107 | if (coreid()==0):
108 |   pipelineStageOne(100)
109 | else:
110 |   if (coreid() % 3 == 1):
111 |     pipelineStageTwo()
112 |   elif (coreid() % 3 == 2):
113 |     pipelineStageThree()
114 |   else:
115 |     pipelineStageFour()
116 | 
117 | def pipelineStageOne(num_items):
118 |   matchingpid=1
119 |   for i in range(num_items):
120 |     num=randrange(500) + 5
121 |     send(num, matchingpid)
122 |     matchingpid+=3
123 |     if matchingpid > 13: matchingpid=1
124 |   for i in range(1,13,3):
125 |     send(-1,i)
126 | 
127 | .....
128 | ```
129 | 
130 | **This is an illustration of the code, the executable version is <a href="https://github.com/mesham/epython/blob/master/examples/split_pipeline.py" target="_blank">here</a>**
131 | 
132 | The code is very similar to the previous simple pipeline code,  but stage 1 (on core 0) is maintaining a matching core ID, *matchingpid* which is sends to next. This value is increased at each stage and then wrapped around once *matchingpid* reaches over 13.
133 | 
134 | Time both the simple and split versions (using the *-t* command line argument for timing information.) You should see quite a significant performance improvement by adopting this splitting approach and taking advantage of the idle cores.
135 | 
136 | ### Parallelising a specific stage
137 | It is quite simple really, to improve performance we want to take advantage of the simple pipeline's idle cores. As we have seen one way is by splitting and duplicating stages. The other way is by keeping the stages exactly the same, but instead to parallelise one specific stage. In our example the sorting (stage 3) is the most expensive, so we can concentrate our idle cores onto this stage.
138 | 
139 | <img src="https://raw.githubusercontent.com/mesham/epython/master/docs/parallel_pipeline.jpg">
140 | 
141 | This is illustrated in the diagram, where *Cn* represents the *nth* Epiphany core and you can see that there are 13 cores allocated to stage three. This can work very well when another pattern can easily be adopted within the stage and here we are going to use geometric decomposition, to split the data up amongst these 13 cores and do a parallel sort on it.
142 | 
143 | ```python
144 | .....
145 | 
146 | if (coreid()==0):
147 |   pipelineStageOne(10)
148 | elif (coreid()==1):
149 |   pipelineStageTwo()
150 | elif (coreid() >= 2 and coreid() <= 14):
151 |   pipelineStageThree()
152 | elif (coreid()==15):
153 |   pipelineStageFour()
154 | 
155 | .....
156 | 
157 | def pipelineStageTwo():
158 |   num=0
159 |   while num >= 0:
160 |     num=recv(coreid()-1)
161 |     j=2
162 |     while j<=14:
163 |       if num > 0:
164 |         i=0
165 |         while i < num/13:
166 |           data[i]=randrange(1000)
167 |           i+=1
168 |         send(num/13, j)
169 |         send(data, j, num/13)
170 |       else:
171 |         send(-1, j)
172 |       j+=1
173 | 
174 | def pipelineStageThree():
175 |   num=0
176 |   while num >=0:
177 |     num=recv(1)
178 |     if num > 0:
179 |       data=recv(1, num)
180 |       parallel_odd_even_sort(num)
181 |     send(num, 15)
182 |     if num > 0: send(data, 15, num)
183 |     
184 | def pipelineStageFour():
185 |   rdata=[0]*100
186 |   num=0
187 |   num_contig=0.0
188 |   total_num=0
189 |   while num >=0:
190 |     i=2
191 |     while i<=14:
192 |       num=recv(i)
193 |       if (num > 0):
194 |         rdata=recv(i, num)
195 |         j=num*i
196 |         while j<num*(i+1):
197 |           data[j]=rdata[j - (num*i)]
198 |           j+=1
199 |       i+=1
200 |     if num > 0:
201 |       .....
202 |     chance=(num_contig/total_num)*100
203 |     print chance+"% of numbers were contiguous"
204 | ```
205 | 
206 | **This is an illustration of the code, the executable version is <a href="https://github.com/mesham/epython/blob/master/examples/parallel_pipeline.py" target="_blank">here</a>**
207 | 
208 | This approach is a bit more complex as, instead of filling in the entire number sequence and passing it along, stage two of the pipeline will complete each subsequence needed for the different cores of stage three and send the specific data to its specific core. The Epiphany cores allocated to stage three then receive their subdata, perform a parallel sort on it (via the *parallel_odd_even_sort* function) and send their values onto stage four which will collate and assemble them in order to perform the final calculation.
209 | 
210 | ### Summary
211 | In this tutorial we have looked at pipelines where the parallelism is oriented around the flow of data. As it flows through the pipeline's stages, data is refined until we get a final value that is output from the final stage. This approach is suited to many problems, and some that you might not nescesarily expect (such as <a href="https://en.wikipedia.org/wiki/Instruction_pipelining" target="_blank">CPU instruction pipelines</a>.) Due to the fast interconnect between the Epiphany cores this approach of streaming data between them is potentially very advantageous - but as we have seen it is really important that each stage is equally busy at all times. If you have an uneven distribution of compution amongst the cores, or lots of idle cores, then splitting the pipeline or parallelising a specific stage can provide a significant gain.
212 | 
213 | More information about pipelines can be found <a href="http://parlab.eecs.berkeley.edu/wiki/_media/patterns/pipeline-v1.pdf" target="_blank">here</a>. An example focussing on the ePython sequential odd-even sort algorithm that we used can be found <a href="https://github.com/mesham/epython/blob/master/examples/odd-even-sort.py" target="_blank">here</a> and the parallel version we used can be found <a href="https://github.com/mesham/epython/blob/master/examples/parallel-odd-even-sort.py" target="_blank">here</a>. 
214 | 


--------------------------------------------------------------------------------
/docs/tutorial3.md:
--------------------------------------------------------------------------------
  1 | # Geometric decomposition on the Epiphany
  2 | In the previous tutorial (available [here](tutorial2.md)) we concentrated on different ways to pass messages between cores which is one of the core mechanisms of parallelism. We saw that messages can be point to point, where only two cores are involved, or collective where every core is involved. The forms of communication that you select depends upon the problem you are trying to solve, and the example we considered (finding PI via the dartboard method) fitted very well with collective communications.
  3 | 
  4 | This tutorial will build upon tutorial two's mechanisms of parallelism in order to take a higher level view of parallel codes by considering some of the common strategies (also known as patterns) that are available to parallel programmers and widely used. This tutorial will concentrate on geometric decomposition, which is also known as domain decomposition.
  5 | 
  6 | Before going any further, if you have not yet used or installed ePython then it is worth following the first tutorial ([here](tutorial1.md)) which walks you though installing ePython and running a simple "hello world" example on the Epiphany cores. If you installed ePython a while ago then it is worth ensuring that you are running the latest version, instructions for upgrading are available [here](installupgrade.md)
  7 | 
  8 | ### Geometric decomposition
  9 | Splitting a problem up geometrically, and allocating different chunks of the data to different cores (or processes) is a very useful technique when there is one key data structure and the major organising principal of parallelism is splitting up of the data itself. In this strategy each core performs (roughly) the same instructions, just operating upon different data. 
 10 | <img src="http://plutocode.ph.unito.it/Doxygen/API-Reference_Guide/UserMa9.gif" width="250" align="left">
 11 | 
 12 | The diagram illustrates geometric decomposition in more detail, where an initially large 2D of data is split up into four chunks and each chunk is then distributed onto a different core. One of the key decisions for the parallel programmer is that of **granularity**, i.e. how many and how large these chunks should be. Granularity is very important because we want to maximise the amount of computation each core performs whilst minimising the communication between cores (which is an overhead of parallelism.) It is a trade-off, for instance in the diagram we only have four large chunks so only four cores can be utilised and these might have a very significant amount of computation to perform. At the other extreem, if we were to split the data into very many smaller chunks, then the cost of communication will likely dominate because each core only has a small amount of computation but very many cores results in lots of communications and cores are predominantly waiting for these communications to complete.
 13 | 
 14 | ### Jacobi iterative method
 15 | We are going to look at an algorithm very commonly used in HPC, namely an iterative method (the Jacobi method) to solve a partial differential equation (PDE.) We will be focussing on Laplace's equation for diffusion, and you can think of a long pipe where we know the value of some pollutant at each end but not throughout the pipe iteself. Based upon this pipe and initial values we want to deduce how the pollution diffuses throughout. In order to solve this problem we split the pipe up into a number of distinct cells and set the values at the left most and right most cells (called the boundary conditions.) For every other cell, the value in that cell depends upon the values held in the neighbouring cells - which themselves depend upon their neighbours. The algorithm works in iterations, where each iteration will update all the unknown values and so progresses towards the final answer. At each iteration we calculate the residual which tells us how far away from the answer the current solution is and we will keep iterating until this residual is small enough to match a predetermined termination accuracy.
 16 | 
 17 | ### Halo swapping
 18 | In order to parallelise this problem we are going to split up the pipe geometrically and allocate different chunks to different Epiphany cores. I have already mentioned that the value at each cell depends upon its neighbouring cells, this is called the *stencil* and in this case we have a stencil size of one (we we only care about the direct neighbour in each direction.) What this tells us is that the majority of our computation will be local, but the calculation for the first and last points in a chunk (held by a core) will require a non-local neighbour's value (held on a different core.)
 19 | 
 20 | <img src="https://raw.githubusercontent.com/mesham/epython/master/docs/decomposition.jpg" width="400" align="right">
 21 | This is illustrated by the diagram, where the top image illustrates a pipe where we are solving 15 unknown pollution elements (empty boxes) and the left most and right most boundary condition (shaded) values are provided. This is then split up into three chunks in the lower illustration, each with 5 elements (empty boxes), and each chunk is allocated on a different core. It can be seen that for each chunk of data, there are actually seven elements - the five empty elements and one shaded on the left and one shaded on the right. These shaded elements are known as halos (or ghosts) and represent the neighbouring value required for the first and last local elements. A halo swap, where cores communicate neighbouring values, is performed en-mass at the start of each iteration and-so when it comes to the computation all the data a core requires is already present. Halo swapping results in fewer, larger messages (which is far more efficient than many smaller messages) and is a very common technique employed by HPC programmers.
 22 | 
 23 | ### ePython code
 24 | Now we have looked at some of the fundamental concepts underlying geometric decomposition and our example, it is time to get to the code!
 25 | 
 26 | ```python
 27 | import parallel
 28 | from math import sqrt
 29 | 
 30 | DATA_SIZE=100
 31 | MAX_ITS=100000
 32 | 
 33 | # Work out the amount of data to hold on this core
 34 | local_size=DATA_SIZE/numcores()
 35 | if local_size * numcores() != DATA_SIZE:
 36 |   if (coreid() < DATA_SIZE-local_size*numcores()): local_size=local_size+1
 37 | 
 38 | # Allocate the two arrays (two as this is Jacobi) we +2 to account for halos/boundary conditions
 39 | data=[0]*(local_size+2)
 40 | data_p1=[0]*(local_size+2)
 41 | 
 42 | # Set the initial conditions
 43 | i=0
 44 | while i<=local_size+1:
 45 |   data[i]=0.0
 46 |   i+=1
 47 | 
 48 | if coreid()==0: data[0]=1.0
 49 | if coreid()==numcores()-1: data[local_size+1]=10.0
 50 | 
 51 | # Compute the initial absolute residual
 52 | tmpnorm=0.0
 53 | i=1
 54 | while i<=local_size:
 55 |   tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2
 56 |   i+=1
 57 | tmpnorm=reduce(tmpnorm, "sum")
 58 | bnorm=sqrt(tmpnorm)
 59 | 
 60 | norm=1.0
 61 | its=0
 62 | while norm >= 1e-4 and its < MAX_ITS:
 63 |   # Halo swap to my left and right neighbours if I have them
 64 |   if (coreid() > 0): data[0]=sendrecv(data[1], coreid()-1)        
 65 |   if (coreid() < numcores()-1): data[local_size+1]=sendrecv(data[local_size], coreid()+1)
 66 | 
 67 |   # Calculate current residual
 68 |   tmpnorm=0.0
 69 |   i=1
 70 |   while i<=local_size:
 71 |     tmpnorm=tmpnorm+(data[i]*2-data[i-1]-data[i+1])^2
 72 |     i+=1
 73 |   tmpnorm=reduce(tmpnorm, "sum")
 74 |   norm=sqrt(tmpnorm)/bnorm
 75 | 
 76 |   if coreid()==0 and its%1000 == 0: print "RNorm is "+norm+" at "+its+" iterations"
 77 | 
 78 |   # Performs the Jacobi iteration for Laplace
 79 |   i=1
 80 |   while i<=local_size:
 81 |     data_p1[i]=0.5* (data[i-1] + data[i+1])
 82 |     i+=1
 83 |   
 84 |   # Swap local data around for next iteration
 85 |   i=1
 86 |   while i<=local_size:
 87 |     data[i]=data_p1[i]
 88 |     i+=1
 89 |   its+=1
 90 | 
 91 | if coreid()==0: print "Completed in "+its+" iterations, RNorm="+norm
 92 | ```
 93 | 
 94 | Copy this into a file named *jacobi.py* and execute *epython jacobi.py* (it is also provided in *examples/jacobi.py*), this will execute over all 16 Epiphany cores and you will see something like:
 95 | 
 96 | ```
 97 | [device 0] RNorm is 1.000000 at 0 iterations
 98 | [device 0] RNorm is 0.004219 at 1000 iterations
 99 | [device 0] RNorm is 0.002365 at 2000 iterations
100 | [device 0] RNorm is 0.001449 at 3000 iterations
101 | [device 0] RNorm is 0.000893 at 4000 iterations
102 | [device 0] RNorm is 0.000552 at 5000 iterations
103 | [device 0] RNorm is 0.000341 at 6000 iterations
104 | [device 0] RNorm is 0.000209 at 7000 iterations
105 | [device 0] RNorm is 0.000129 at 8000 iterations
106 | [device 0] Completed in 8500 iterations, RNorm=0.000100
107 | ```
108 | 
109 | At the top of the code the *DATA_SIZE* variable sets the global length of the pipe (100 in this case) and initially the cores will split up the pipe and determine how much data they hold locally (in *local_size*) before allocating the arrays *data* and *data_p1* to hold their local data. You can see that each core actually allocates *local_size+2* data elements, we have 2 extra elements for the left and right halos.
110 | 
111 | The initial absolute residual is then calculated which deduces how far away from the final answer the initial setup lies, each core calculates the local residual and than the *reduce* collective communication call is used to sum these up to a global value.
112 | 
113 | At line 35 each core will begin iterating and directly after this the halo swap is performed via the *sendrecv* communication calls which combine both sending to and receiving from a core into one operation. The residual (how far the solution is from the final answer) is calculated for each iteration (again using the *reduce* collective) and this is then taken relative to the initial residual to determine how far the solution has progressed which is one of our termination criteria.
114 | 
115 | This Jacobi method, whilst it is the slowest iterative solver, has some nice properties. One such property is that, given a fixed global problem size, irrespective of the number of cores you run with the progression towards the final answer at each iteration should be the same. You can display timing information via the *-t* command line argument, time a run with all 16 Epiphany cores and then run it only using 3 (*-d 3* command line argument) cores, the runtime will increase because we have fewer cores doing more work. 
116 | 
117 | No surprises so far, but remember at the start of the tutorial we discussed the granularity of the decomposition (fewer larger data chunks or many smaller chunks.) With this default pipe length of 100, 3 chunks is too few but actually 16 chunks is too many. Fixing the global problem size and varying the number of cores is an example of **strong scaling**, and running with abount 8 Epiphany cores is the optimum. Smaller core counts are slower because computations rule and larger core counts are slower because communication costs rule. Many people assume that simply throwing cores at a problem will speed it up, but as we have seen that is certainly not the case and often beyond a specific optimum (8 cores here), increasing the number of cores will actually slow down your code run.
118 | 
119 | If you double the global problem size (**weak scaling**) to 200 via the *DATA_SIZE* variable and rerun with 8 and 16 cores then you will see that 16 cores is faster than 8 cores in this scenario because there is still plenty of computational work for each core to perform at that granularity. In your home directory is a utility called *ztemp.sh*, which reports the heat of your Parallella, run this when the board is idle (for a baseline), then run the Jacobi example and, depending on how good your cooling solution is, you should see an increase in temperature when you run *ztemp.sh* again.
120 | 
121 | ### Summary
122 | In this tutorial we have looked at geometric decomposition (also known as domain decomposition) which is a very common strategy for parallelism when your code is oriented around some key data structure(s) which can easily be split up. Iterative methods are very commonly used on supercomputers for solving systems of linear equations (such as Laplace's PDE here) and Jacobi is one of these methods. Iterative methods, as well as many other computational algorithms, lend themselves towards geometric decomposition and this way of splitting the problem up feels very natural in these cases.
123 | 
124 | More information about Geometric decomposition can be found [here](http://parlab.eecs.berkeley.edu/wiki/patterns/geometric_decomposition) and more information about iterative methods can be found [here](http://www.maa.org/press/periodicals/loci/joma/iterative-methods-for-solving-iaxi-ibi-introduction-to-the-iterative-methods)
125 | 


--------------------------------------------------------------------------------
/host/memorymanager.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2015, Nick Brown
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that the following conditions are met:
  7 |  *
  8 |  * Redistributions of source code must retain the above copyright notice, this
  9 |  * list of conditions and the following disclaimer.
 10 |  *
 11 |  * Redistributions in binary form must reproduce the above copyright notice,
 12 |  * this list of conditions and the following disclaimer in the documentation
 13 |  * and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 18 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 19 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 21 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 22 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 23 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 24 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <stdlib.h>
 28 | #include <stddef.h>
 29 | #include <string.h>
 30 | #include <stdio.h>
 31 | #include "memorymanager.h"
 32 | 
 33 | // This is set at the end of parsing to be the entire byte code representation of the users Python program
 34 | struct memorycontainer* assembledMemory=NULL;
 35 | // This is the function list
 36 | struct functionListNode* functionListHead=NULL;
 37 | // Exportable view of the functions and their location in the byte code
 38 | struct exportableFunctionTableNode* exportableFunctionTable=NULL;
 39 | int numberExportableFunctionsInTable=0;
 40 | 
 41 | struct function_call_tree_node mainCodeCallTree;
 42 | 
 43 | static void determineUsedFunctions(void);
 44 | static void processUsedFunction(struct functionDefinition*);
 45 | static unsigned short findLocationOfLineNumber(struct lineDefinition*, int);
 46 | static unsigned short findLocationOfFunctionName(struct lineDefinition*, char*, int, int);
 47 | static struct functionDefinition* findFunctionDefinition(char*);
 48 | static int doesFunctionAlreadyExistInExportableTable(char*);
 49 | 
 50 | int getNumberOfSymbolEntriesNotUsed(void) {
 51 |     int ignoreSymbolEntries=0;
 52 |     struct functionListNode * fnHead=functionListHead;
 53 |     while (fnHead != NULL) {
 54 |         if (!fnHead->fn->called) ignoreSymbolEntries+=fnHead->fn->numberEntriesInSymbolTable;
 55 |         fnHead=fnHead->next;
 56 |     }
 57 |     return ignoreSymbolEntries;
 58 | }
 59 | 
 60 | /**
 61 |  * Compiles the memory by going through and resolving relative links (i.e. gotos) and adds a stop at the end
 62 |  */
 63 | void compileMemory(struct memorycontainer* memory) {
 64 | 	int i;
 65 | 	determineUsedFunctions();
 66 | 	struct memorycontainer* stopStatement=appendStopStatement();
 67 | 	if (memory != NULL) {
 68 | 		struct memorycontainer* compiledMem=concatenateMemory(memory, stopStatement);
 69 | 		struct functionListNode * fnHead=functionListHead;
 70 | 		while (fnHead != NULL) {
 71 | 			if (fnHead->fn->called) compiledMem=concatenateMemory(compiledMem, fnHead->fn->contents);
 72 | 			if (fnHead->fn->functionCalls != NULL) {
 73 | 				for (i=0;i<fnHead->fn->number_of_fn_calls;i++) {
 74 | 					free(fnHead->fn->functionCalls[i]);
 75 | 				}
 76 | 				free(fnHead->fn->functionCalls);
 77 | 			}
 78 | 			fnHead=fnHead->next;
 79 | 		}
 80 | 		struct lineDefinition * root=compiledMem->lineDefns, *r2;
 81 | 		while (root != NULL) {
 82 | 			if (root->type==1) {
 83 | 				unsigned short lineLocation=findLocationOfLineNumber(compiledMem->lineDefns, root->linenumber);
 84 | 				memcpy(&compiledMem->data[root->currentpoint], &lineLocation, sizeof(unsigned short));
 85 | 			} else if (root->type==3 || root->type==4 || root->type==2) {
 86 | 				unsigned short lineLocation=findLocationOfFunctionName(compiledMem->lineDefns, root->name, root->linenumber, root->type==4);
 87 | 				if (root->type==3 || root->type==4) {
 88 | 					memcpy(&compiledMem->data[root->currentpoint], &lineLocation, sizeof(unsigned short));
 89 | 				}
 90 | 				if (!doesFunctionAlreadyExistInExportableTable(root->name)) {
 91 | 					struct exportableFunctionTableNode* newExportableNode=(struct exportableFunctionTableNode*) malloc(sizeof(struct exportableFunctionTableNode));
 92 | 					newExportableNode->functionLocation=lineLocation;
 93 | 					newExportableNode->functionName=(char*) malloc(strlen(root->name)+1);
 94 | 					strcpy(newExportableNode->functionName, root->name);
 95 | 					newExportableNode->next=exportableFunctionTable;
 96 | 					exportableFunctionTable=newExportableNode;
 97 | 					numberExportableFunctionsInTable++;
 98 | 				}
 99 | 			}
100 | 			root=root->next;
101 | 		}
102 | 		// Clear up the memory used for these line definition nodes
103 | 		root=compiledMem->lineDefns;
104 | 		while (root != NULL) {
105 | 			r2=root->next;
106 | 			free(root);
107 | 			root=r2;
108 | 		}
109 | 		assembledMemory=compiledMem;
110 | 	} else {
111 | 		assembledMemory=stopStatement;
112 | 	}
113 | }
114 | 
115 | /**
116 | * Determines whether a specific function of a specific name already exists in the exportable global function table
117 | */
118 | static int doesFunctionAlreadyExistInExportableTable(char* functionName) {
119 | 	struct exportableFunctionTableNode* root=exportableFunctionTable;
120 | 	while (root != NULL) {
121 | 		if (strcmp(root->functionName, functionName) == 0) return 1;
122 | 		root=root->next;
123 | 	}
124 | 	return 0;
125 | }
126 | 
127 | /**
128 |  * Determines the used (i.e. called by the code) functions, driven from the main function
129 |  */
130 | static void determineUsedFunctions(void) {
131 | 	int i;
132 | 	for (i=0;i<mainCodeCallTree.number_of_calls;i++) {
133 | 		struct functionDefinition* defn=findFunctionDefinition(mainCodeCallTree.calledFunctions[i]);
134 | 		if (defn != NULL) {
135 | 			if (!defn->called) processUsedFunction(defn);
136 | 		}
137 | 	}
138 | }
139 | 
140 | /**
141 |  * Marks the current function as used (i.e. called from code), if it has not already been processed will then
142 |  * go and examine all the called functions from this
143 |  */
144 | static void processUsedFunction(struct functionDefinition* specificFunction) {
145 | 	specificFunction->called=1;
146 | 	if (specificFunction->functionCalls != NULL) {
147 | 		int i;
148 | 		for (i=0;i<specificFunction->number_of_fn_calls;i++) {
149 | 			struct functionDefinition* defn=findFunctionDefinition(specificFunction->functionCalls[i]);
150 | 			if (defn != NULL) {
151 | 				if (!defn->called) processUsedFunction(defn);
152 | 			}
153 | 		}
154 | 	}
155 | }
156 | 
157 | /**
158 |  * Adds a function to the function list which are all combined in the compile memory function
159 |  */
160 | void addFunction(struct functionDefinition* functionDefintion) {
161 | 	struct functionListNode * node=(struct functionListNode*) malloc(sizeof(struct functionListNode));
162 | 	node->fn=functionDefintion;
163 | 	node->next=functionListHead;
164 | 	functionListHead=node;
165 | }
166 | 
167 | static struct functionDefinition* findFunctionDefinition(char * functionName) {
168 | 	struct functionListNode * node=functionListHead;
169 | 	while (node != NULL) {
170 | 		if (strcmp(node->fn->name, functionName) == 0) return node->fn;
171 | 		node=node->next;
172 | 	}
173 | 	return NULL;
174 | }
175 | 
176 | int getNumberSymbolTableEntriesForRecursion(void) {
177 | 	int r=0;
178 | 	struct functionListNode * fnHead=functionListHead;
179 | 	while (fnHead != NULL) {
180 | 		if (fnHead->fn->recursive && fnHead->fn->called) r+=fnHead->fn->numberEntriesInSymbolTable;
181 | 		fnHead=fnHead->next;
182 | 	}
183 | 	return r;
184 | }
185 | 
186 | /**
187 |  * Given a line number will return the byte location of this in the memory
188 |  */
189 | static unsigned short findLocationOfLineNumber(struct lineDefinition * root, int lineNumber) {
190 | 	while (root != NULL) {
191 | 		if (root->type==0 && root->linenumber == lineNumber) return (unsigned short) root->currentpoint;
192 | 		root=root->next;
193 | 	}
194 | 	fprintf(stderr, "Can not find line %d in goto\n", lineNumber);
195 | 	exit(0);
196 | }
197 | 
198 | /**
199 |  * Finds the location of a function name and returns this or raises an error if the function is not found
200 |  */
201 | static unsigned short findLocationOfFunctionName(struct lineDefinition * root, char * functionName, int line_num_for_error, int isvarorfn) {
202 | 	while (root != NULL) {
203 | 		if (root->type==2 && strcmp(root->name, functionName) == 0) return (unsigned short) root->currentpoint;
204 | 		root=root->next;
205 | 	}
206 | 	if (isvarorfn) {
207 |         fprintf(stderr, "Can not find variable or function '%s' in assignment at line number %d\n", functionName, line_num_for_error);
208 | 	} else {
209 |         fprintf(stderr, "Can not find function '%s' in function call at line number %d\n", functionName, line_num_for_error);
210 | 	}
211 | 	exit(0);
212 | }
213 | 
214 | /**
215 |  * Concatenates two memory structures together and returns the result of this
216 |  */
217 | struct memorycontainer* concatenateMemory(struct memorycontainer* m1, struct memorycontainer* m2) {
218 | 	if (m1 == NULL) return m2;
219 | 	if (m2 == NULL) return m1;
220 | 	struct memorycontainer* memoryContainer = (struct memorycontainer*) malloc(sizeof(struct memorycontainer));
221 | 	memoryContainer->length=m1->length + m2->length;
222 | 	memoryContainer->data=malloc(memoryContainer->length);
223 | 	memoryContainer->lineDefns=m1->lineDefns;
224 | 	if (m1->data != NULL && m1->length > 0) memcpy(memoryContainer->data, m1->data, m1->length);
225 | 	if (m2->data != NULL && m2->length > 0) memcpy(&memoryContainer->data[m1->length], m2->data, m2->length);
226 | 	struct lineDefinition * root=m2->lineDefns, *r2;
227 | 	while (root != NULL) {
228 | 		root->currentpoint+=m1->length;
229 | 		r2=root->next;
230 | 		root->next=memoryContainer->lineDefns;
231 | 		memoryContainer->lineDefns=root;
232 | 		root=r2;
233 | 	}
234 | 	// Free up the m1 and m2 memory
235 | 	free(m1->data);
236 | 	free(m1);
237 | 	free(m2->data);
238 | 	free(m2);
239 | 	return memoryContainer;
240 | }
241 | 
242 | struct memorycontainer* cloneMemory(struct memorycontainer* m1) {
243 | 	struct memorycontainer* memoryContainer = (struct memorycontainer*) malloc(sizeof(struct memorycontainer));
244 | 	memoryContainer->length=m1->length;
245 | 	memoryContainer->data=malloc(memoryContainer->length);
246 | 	memoryContainer->lineDefns=m1->lineDefns;
247 | 	if (m1->data != NULL && m1->length > 0) memcpy(memoryContainer->data, m1->data, m1->length);
248 | 	return memoryContainer;
249 | }
250 | 
251 | /**
252 |  * Appends a statement to some memory and returns the new current location (for next entry)
253 |  */
254 | unsigned int appendStatement(struct memorycontainer* memory, unsigned char command, unsigned int position) {
255 | 	memcpy(&memory->data[position], &command, sizeof(unsigned char));
256 | 	position+=sizeof(unsigned char);
257 | 	return position;
258 | }
259 | 
260 | /**
261 |  * Appends a variable to some memory and returns the new current location (for next entry)
262 |  */
263 | unsigned int appendVariable(struct memorycontainer* memory, unsigned short variableid, unsigned int position) {
264 | 	memcpy(&memory->data[position], &variableid, sizeof(short));
265 | 	position+=sizeof(short);
266 | 	return position;
267 | }
268 | 
269 | /**
270 |  * Appends some memory to some other existing memory at a specific location
271 |  */
272 | unsigned int appendMemory(struct memorycontainer* memory, struct memorycontainer* statement, unsigned int position) {
273 | 	memcpy(&memory->data[position], statement->data, statement->length);
274 | 
275 | 	struct lineDefinition * root=statement->lineDefns, *r2;
276 | 	while (root != NULL) {
277 | 		root->currentpoint+=position;
278 | 		r2=root->next;
279 | 		root->next=memory->lineDefns;
280 | 		memory->lineDefns=root;
281 | 		root=r2;
282 | 	}
283 | 	position+=statement->length;
284 | 
285 | 	// Free up the statement memory
286 | 	free(statement->data);
287 | 	free(statement);
288 | 	return position;
289 | }
290 | 
291 | /**
292 |  * Gets the length of the assembled memory
293 |  */
294 | unsigned int getMemoryFilledSize() {
295 | 	if (assembledMemory == NULL) return 0;
296 | 	return assembledMemory->length;
297 | }
298 | 
299 | /**
300 |  * Sets the length of the assembled memory (when loading from bytecode file)
301 |  */
302 | void setMemoryFilledSize(unsigned int size) {
303 | 	if (assembledMemory == NULL) assembledMemory= (struct memorycontainer*) malloc(sizeof(struct memorycontainer));
304 | 	assembledMemory->length=size;
305 | }
306 | 
307 | /**
308 |  * Gets the bytecode in the assembled memory
309 |  */
310 | char * getAssembledCode() {
311 | 	if (assembledMemory == NULL) return NULL;
312 | 	return assembledMemory->data;
313 | }
314 | 
315 | /**
316 |  * Sets the code in the assembled memory (when loading from bytecode file)
317 |  */
318 | void setAssembledCode(char * a) {
319 | 	if (assembledMemory == NULL) assembledMemory= (struct memorycontainer*) malloc(sizeof(struct memorycontainer));
320 | 	assembledMemory->data=a;
321 | }
322 | 


--------------------------------------------------------------------------------