48 | *
49 | * The goal is to fill in the missing numbers so that every row, column and box contains each of the
50 | * numbers 1-9. Here is the solution to the problem above:
51 | *
52 | *
67 | *
68 | * Note that the first row 187492563 contains each number exactly once, as does the
69 | * first column 159426873, the upper-left box 187534962, and every other
70 | * row, column and box.
71 | *
72 | *
73 | * The {@link #main(String[])} method encodes a problem as an array of strings, with one string
74 | * encoding each constraint in the problem in row-column-value format. Here is the problem again
75 | * with the indices indicated:
76 | *
77 | *
93 | *
94 | * The 8 in the upper left box of the puzzle is encoded as 018 (
95 | * 0 for the row, 1 for the column, and 8 for the value). The
96 | * 4 in the lower right box is encoded as 874.
97 | *
98 | *
99 | * The full command-line invocation for the above puzzle is:
100 | *
101 | *
121 | * See Wikipedia: Sudoku for more information on
122 | * Sudoku.
123 | *
124 | *
125 | * The algorithm employed is similar to the standard backtracking
126 | * eight queens algorithm.
127 | *
128 | * @version 1.0
129 | * @author Bob Carpenter
130 | */
131 | public class Sudoku extends Remoteable {
132 |
133 | private static final long serialVersionUID = -3962977915411306215L;
134 |
135 | private transient DFE dfe;
136 |
137 | private int[][] matrix;
138 |
139 | private String[] input =
140 | {"006", "073", "102", "131", "149", "217", "235", "303", "345", "361", "378", "422", "465",
141 | "514", "521", "548", "582", "658", "679", "743", "752", "784", "818", "883"};
142 |
143 | public Sudoku(DFE dfe) {
144 | this.dfe = dfe;
145 | matrix = parseProblem(input);
146 | }
147 |
148 | @Remote
149 | public boolean localhasSolution() {
150 | return solve(0, 0, matrix);
151 | }
152 |
153 | public boolean solve(int i, int j, int[][] cells) {
154 | if (i == 9) {
155 | i = 0;
156 | if (++j == 9)
157 | return true;
158 | }
159 | if (cells[i][j] != 0)
160 | return solve(i + 1, j, cells);
161 | for (int val = 1; val <= 9; ++val) {
162 | if (legal(i, j, val, cells)) {
163 | cells[i][j] = val;
164 | if (solve(i + 1, j, cells))
165 | return true;
166 | }
167 | }
168 | cells[i][j] = 0;
169 | return false;
170 | }
171 |
172 | private boolean legal(int i, int j, int val, int[][] cells) {
173 | for (int k = 0; k < 9; ++k)
174 | if (val == cells[k][j])
175 | return false;
176 | for (int k = 0; k < 9; ++k)
177 | if (val == cells[i][k])
178 | return false;
179 | int boxRowOffset = (i / 3) * 3;
180 | int boxColOffset = (j / 3) * 3;
181 | for (int k = 0; k < 3; ++k)
182 | for (int m = 0; m < 3; ++m)
183 | if (val == cells[boxRowOffset + k][boxColOffset + m])
184 | return false;
185 | return true;
186 | }
187 |
188 | static int[][] parseProblem(String[] input) {
189 | int[][] problem = new int[9][9];
190 | for (int n = 0; n < input.length; ++n) {
191 | int i = Integer.parseInt(input[n].substring(0, 1));
192 | int j = Integer.parseInt(input[n].substring(1, 2));
193 | int val = Integer.parseInt(input[n].substring(2, 3));
194 | problem[i][j] = val;
195 | }
196 | return problem;
197 | }
198 |
199 | @Override
200 | public void copyState(Remoteable state) {}
201 |
202 | public boolean hasSolution() {
203 | Method toExecute;
204 | Class>[] paramTypes = null;
205 | Object[] paramValues = null;
206 | boolean result = false;
207 | try {
208 | toExecute = this.getClass().getDeclaredMethod("localhasSolution", paramTypes);
209 | result = (Boolean) dfe.execute(toExecute, paramValues, this);
210 | } catch (SecurityException e) {
211 | // Should never get here
212 | e.printStackTrace();
213 | throw e;
214 | } catch (NoSuchMethodException e) {
215 | // Should never get here
216 | e.printStackTrace();
217 | } catch (Throwable e) {
218 | // TODO Auto-generated catch block
219 | e.printStackTrace();
220 | }
221 | return result;
222 | }
223 | }
224 |
--------------------------------------------------------------------------------
/rapidDemoApp/src/main/java/eu/project/rapid/queens/NQueens.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (C) 2015, 2016 RAPID EU Project
3 | *
4 | * This library is free software; you can redistribute it and/or modify it under the terms of the
5 | * GNU Lesser General Public License as published by the Free Software Foundation; either version
6 | * 2.1 of the License, or (at your option) any later version.
7 | *
8 | * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
9 | * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 | * Lesser General Public License for more details.
11 | *
12 | * You should have received a copy of the GNU Lesser General Public License along with this library;
13 | * if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
14 | * 02110-1301 USA
15 | *******************************************************************************/
16 | package eu.project.rapid.queens;
17 |
18 | import android.util.Log;
19 |
20 | import java.lang.reflect.Method;
21 |
22 | import eu.project.rapid.ac.DFE;
23 | import eu.project.rapid.ac.Remote;
24 | import eu.project.rapid.ac.Remoteable;
25 | import eu.project.rapid.ac.utils.Utils;
26 |
27 | public class NQueens extends Remoteable {
28 |
29 | private static final long serialVersionUID = 5687713591581731140L;
30 | private static final String TAG = "NQueens";
31 | private int N = 8;
32 | private int nrVMs;
33 | public boolean enforceForwarding = false;
34 | private transient DFE dfe;
35 |
36 | /**
37 | * @param dfe The dfe taking care of the execution.
38 | * @param nrVMs In case of remote execution specify the number of VMs needed.
39 | */
40 | public NQueens(DFE dfe, int nrVMs) {
41 | this.dfe = dfe;
42 | this.nrVMs = nrVMs;
43 | }
44 |
45 | /**
46 | * @param dfe The execution dfe taking care of the execution
47 | */
48 | public NQueens(DFE dfe) {
49 | this(dfe, 1);
50 | }
51 |
52 | @Override
53 | public void prepareDataOnClient() {
54 |
55 | }
56 |
57 | /**
58 | * Solve the N-queens problem
59 | *
60 | * @param N The number of queens
61 | * @return The number of solutions found
62 | */
63 | public int solveNQueens(int N) {
64 | this.N = N;
65 | Method toExecute;
66 | Class>[] paramTypes = {int.class};
67 | Object[] paramValues = {N};
68 |
69 | int result = 0;
70 | try {
71 | toExecute = this.getClass().getDeclaredMethod("localSolveNQueens", paramTypes);
72 | result = (Integer) dfe.execute(toExecute, paramValues, this);
73 | } catch (SecurityException e) {
74 | // Should never get here
75 | e.printStackTrace();
76 | throw e;
77 | } catch (NoSuchMethodException e) {
78 | // Should never get here
79 | e.printStackTrace();
80 | } catch (Throwable e) {
81 | // TODO Auto-generated catch block
82 | e.printStackTrace();
83 | }
84 | return result;
85 | }
86 |
87 | @Remote
88 | public int localSolveNQueens(int N) {
89 |
90 | int countSolutions = 0;
91 |
92 | byte[][] board = new byte[N][N];
93 |
94 | int start = 0, end = N;
95 |
96 | if (Utils.isOffloaded()) {
97 | Log.i(TAG, "This is now running on the VM...");
98 |
99 | // FIXME Here we use this flag to enforce a forwarding
100 | if (enforceForwarding) {
101 | // set to false, so the other VM will not keep forwarding.
102 | enforceForwarding = false;
103 | throw new OutOfMemoryError("Enforcing offload forwarding");
104 | }
105 |
106 | // cloneId == 0 if this is the main clone
107 | // or [1, nrVMs-1] otherwise
108 | int cloneId = Utils.readCloneHelperId();
109 | int howManyCols = (N) / nrVMs; // Integer division, we may
110 | // loose some columns.
111 | start = cloneId * howManyCols; // cloneId == 0 if this is the main clone
112 | end = start + howManyCols;
113 |
114 | // If this is the clone with the highest id let him take care
115 | // of the columns not considered due to the integer division.
116 | if (cloneId == nrVMs - 1) {
117 | end += N % nrVMs;
118 | }
119 | }
120 |
121 | Log.i(TAG, "Finding solutions for " + N + "-queens puzzle.");
122 | Log.i(TAG, "Analyzing columns: " + start + "-" + (end - 1));
123 |
124 | for (int i = start; i < end; i++) {
125 | for (int j = 0; j < N; j++) {
126 | for (int k = 0; k < N; k++) {
127 | for (int l = 0; l < N; l++) {
128 | if (N == 4) {
129 | countSolutions += setAndCheckBoard(board, i, j, k, l);
130 | continue;
131 | }
132 | for (int m = 0; m < N; m++) {
133 | if (N == 5) {
134 | countSolutions += setAndCheckBoard(board, i, j, k, l, m);
135 | continue;
136 | }
137 | for (int n = 0; n < N; n++) {
138 | if (N == 6) {
139 | countSolutions += setAndCheckBoard(board, i, j, k, l, m, n);
140 | continue;
141 | }
142 | for (int o = 0; o < N; o++) {
143 | if (N == 7) {
144 | countSolutions += setAndCheckBoard(board, i, j, k, l, m, n, o);
145 | continue;
146 | }
147 | for (int p = 0; p < N; p++) {
148 | countSolutions += setAndCheckBoard(board, i, j, k, l, m, n, o, p);
149 | }
150 | }
151 | }
152 | }
153 | }
154 | }
155 | }
156 | }
157 |
158 | Log.i(TAG, "Found " + countSolutions + " solutions.");
159 |
160 | return countSolutions;
161 | }
162 |
163 | /**
164 | * When having more than one clone running the method there will be partial results which should
165 | * be combined to get the total result. This will be done automatically by the main clone by
166 | * calling this method.
167 | *
168 | * @param params Array of partial results.
169 | * @return The total result.
170 | */
171 | public int localSolveNQueensReduce(int[] params) {
172 | int solutions = 0;
173 | for (int param : params) {
174 | Log.i(TAG, "Adding " + param + " partial solutions.");
175 | solutions += param;
176 | }
177 | return solutions;
178 | }
179 |
180 | private int setAndCheckBoard(byte[][] board, int... cols) {
181 |
182 | clearBoard(board);
183 |
184 | for (int i = 0; i < N; i++)
185 | board[i][cols[i]] = 1;
186 |
187 | if (isSolution(board))
188 | return 1;
189 |
190 | return 0;
191 | }
192 |
193 | private void clearBoard(byte[][] board) {
194 | for (int i = 0; i < N; i++) {
195 | for (int j = 0; j < N; j++) {
196 | board[i][j] = 0;
197 | }
198 | }
199 | }
200 |
201 | private boolean isSolution(byte[][] board) {
202 |
203 | int rowSum = 0;
204 | int colSum = 0;
205 |
206 | for (int i = 0; i < N; i++) {
207 | for (int j = 0; j < N; j++) {
208 | rowSum += board[i][j];
209 | colSum += board[j][i];
210 |
211 | if (i == 0 || j == 0)
212 | if (!checkDiagonal1(board, i, j))
213 | return false;
214 |
215 | if (i == 0 || j == N - 1)
216 | if (!checkDiagonal2(board, i, j))
217 | return false;
218 |
219 | }
220 | if (rowSum > 1 || colSum > 1)
221 | return false;
222 | rowSum = 0;
223 | colSum = 0;
224 | }
225 |
226 | return true;
227 | }
228 |
229 | private boolean checkDiagonal1(byte[][] board, int row, int col) {
230 | int sum = 0;
231 | int i = row;
232 | int j = col;
233 | while (i < N && j < N) {
234 | sum += board[i][j];
235 | i++;
236 | j++;
237 | }
238 | return sum <= 1;
239 | }
240 |
241 | private boolean checkDiagonal2(byte[][] board, int row, int col) {
242 | int sum = 0;
243 | int i = row;
244 | int j = col;
245 | while (i < N && j >= 0) {
246 | sum += board[i][j];
247 | i++;
248 | j--;
249 | }
250 | return sum <= 1;
251 | }
252 |
253 | private void printBoard(byte[][] board) {
254 | for (int i = 0; i < N; i++) {
255 | StringBuilder row = new StringBuilder();
256 | for (int j = 0; j < N; j++) {
257 | row.append(board[i][j]);
258 | if (j < N - 1)
259 | row.append(" ");
260 | }
261 | Log.i(TAG, row.toString());
262 | }
263 | Log.i(TAG, "\n");
264 | }
265 |
266 | public void setNumberOfClones(int nrClones) {
267 | this.nrVMs = nrClones;
268 | }
269 |
270 | @Override
271 | public void copyState(Remoteable state) {
272 |
273 | }
274 |
275 | public boolean isEnforceForwarding() {
276 | return enforceForwarding;
277 | }
278 |
279 | public void setEnforceForwarding(boolean enforceForwarding) {
280 | this.enforceForwarding = enforceForwarding;
281 | }
282 | }
283 |
284 |
--------------------------------------------------------------------------------
/rapidDemoApp/src/main/java/eu/project/rapid/gvirtus/MatrixMul.java:
--------------------------------------------------------------------------------
1 | package eu.project.rapid.gvirtus;
2 |
3 | import android.util.Log;
4 |
5 | import java.io.IOException;
6 | import java.lang.reflect.Method;
7 |
8 | import eu.project.rapid.ac.DFE;
9 | import eu.project.rapid.ac.Remote;
10 | import eu.project.rapid.ac.Remoteable;
11 | import eu.project.rapid.gvirtus4a.CudaDrFrontend;
12 | import eu.project.rapid.gvirtus4a.CudaException;
13 | import eu.project.rapid.gvirtus4a.Provider;
14 | import eu.project.rapid.gvirtus4a.Providers;
15 | import eu.project.rapid.gvirtus4a.Util;
16 | import eu.project.rapid.gvirtus4a.params.FloatArrayParam;
17 | import eu.project.rapid.gvirtus4a.params.IntParam;
18 | import eu.project.rapid.gvirtus4a.params.StringParam;
19 |
20 | /**
21 | * Created by raffaelemontella on 26/04/2017.
22 | */
23 |
24 | public class MatrixMul extends Remoteable {
25 | public static final String LOG_TAG="MATRIXMUL";
26 | private transient DFE dfe;
27 |
28 | private int widthA;
29 | private int heightA;
30 | private int widthB;
31 |
32 | private String ptxSource;
33 |
34 | public static void registerProviders() {
35 | Providers providers=Providers.getInstance();
36 | providers.unregister();
37 | // OTC
38 | providers.register("80.158.23.133", 9998);
39 | // Amazon Web Services
40 | providers.register("54.72.110.23", 9998);
41 | // Silo
42 | providers.register( "83.235.169.221",9998);
43 | // UNP RAPID GPU AS
44 | providers.register("193.205.230.23", 9998);
45 |
46 | // Ask for the best provider and set it as default
47 | //providers.setDefaultProvider(providers.getBest());
48 | }
49 |
50 | public MatrixMul(DFE dfe) {
51 | this.dfe = dfe;
52 | String ptxName = "cuda-kernels/matrixMul_kernel64.ptx";
53 | try {
54 | ptxSource = Util.readAssetFileAsString(dfe.getContext(), ptxName);
55 | Log.v(LOG_TAG, "Read the PTX source");
56 | Log.v(LOG_TAG, ptxSource);
57 | } catch (IOException ex) {
58 | throw new RuntimeException(ex);
59 | }
60 | }
61 |
62 | @Override
63 | public void prepareDataOnClient() {
64 |
65 | }
66 |
67 | @Override
68 | public void copyState(Remoteable state) {
69 |
70 | }
71 |
72 | public boolean gpuMatrixMul(int widthA, int heightA, int widthB) {
73 | this.widthA = widthA;
74 | this.heightA = heightA;
75 | this.widthB = widthB;
76 | Method toExecute;
77 | boolean result = false;
78 | Class>[] paramTypes = {int.class, int.class, int.class};
79 | Object[] paramValues = {widthA, heightA, widthB};
80 |
81 | try {
82 | toExecute = this.getClass().getDeclaredMethod("localGpuMatrixMul", paramTypes);
83 | result = (Boolean) dfe.execute(toExecute, paramValues, this);
84 | } catch (SecurityException e) {
85 | // Should never get here
86 | e.printStackTrace();
87 | throw e;
88 | } catch (NoSuchMethodException e) {
89 | // Should never get here
90 | e.printStackTrace();
91 | } catch (Throwable e) {
92 | // TODO Auto-generated catch block
93 | e.printStackTrace();
94 | }
95 |
96 | return result;
97 | }
98 |
99 | @Remote
100 | public boolean localGpuMatrixMul(int widthA, int heightA, int widthB) {
101 | Log.v(LOG_TAG, "Entered matrixMul");
102 |
103 | final float valB = 0.01f;
104 | int exit_c=0;
105 | //CudaDrFrontend driver = new CudaDrFrontend("193.205.230.23", 9998);
106 | Provider bestProvider=Providers.getInstance().getDefaultProvider();
107 | if (bestProvider!=null) {
108 | Log.d(LOG_TAG,"Best provider:"+bestProvider.getHost()+":"+bestProvider.getPort());
109 | try {
110 | CudaDrFrontend driver = new CudaDrFrontend(bestProvider.getHost(), bestProvider.getPort());
111 |
112 |
113 | exit_c = driver.cuInit(0);
114 | if (exit_c != 0) {
115 | throw new RuntimeException(new CudaException(exit_c));
116 | }
117 | StringParam spCuContext = new StringParam();
118 |
119 | exit_c = driver.cuCtxCreate(0, 0, spCuContext);
120 | if (exit_c != 0) {
121 | throw new RuntimeException(new CudaException(exit_c));
122 | }
123 | String cuContext = spCuContext.value;
124 |
125 | IntParam ipDevice = new IntParam();
126 | exit_c = driver.cuDeviceGet(0, ipDevice);
127 | if (exit_c != 0) {
128 | throw new RuntimeException(new CudaException(exit_c));
129 | }
130 |
131 | Log.v(LOG_TAG, "matrixMul 1");
132 |
133 | int jitNumOptions = 3;
134 | int[] jitOptions = new int[jitNumOptions];
135 |
136 | // set up size of compilation log buffer
137 | jitOptions[0] = 4;// CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
138 | long jitLogBufferSize = 1024;
139 | long jitOptVals0 = jitLogBufferSize;
140 |
141 | // set up pointer to the compilation log buffer
142 | jitOptions[1] = 3;// CU_JIT_INFO_LOG_BUFFER;
143 |
144 | char[] jitLogBuffer = new char[(int) jitLogBufferSize];
145 | char[] jitOptVals1 = jitLogBuffer;
146 |
147 | Log.v(LOG_TAG, "matrixMul 2");
148 |
149 | // set up pointer to set the Maximum # of registers for a particular
150 | // kernel
151 | jitOptions[2] = 0;// CU_JIT_MAX_REGISTERS;
152 | long jitRegCount = 32;
153 | long jitOptVals2 = jitRegCount;
154 |
155 | Log.v(LOG_TAG, "matrixMul 3");
156 |
157 | StringParam spModule = new StringParam();
158 | exit_c = driver.cuModuleLoadDataEx(
159 | ptxSource, jitNumOptions, jitOptions, jitOptVals0,
160 | jitOptVals1, jitOptVals2, spModule);
161 | if (exit_c != 0) {
162 | throw new RuntimeException(new CudaException(exit_c));
163 | }
164 | String cmodule = spModule.value;
165 | Log.v(LOG_TAG, "matrixMul 4");
166 |
167 | StringParam spFunction = new StringParam();
168 | exit_c = driver.cuModuleGetFunction(cmodule, "matrixMul_bs32_32bit", spFunction);
169 | if (exit_c != 0) {
170 | throw new RuntimeException(new CudaException(exit_c));
171 | }
172 | String cfunction = spFunction.value;
173 | Log.v(LOG_TAG, "matrixMul 5");
174 |
175 | // allocate host memory for matrices A and B
176 | int block_size = 32; // larger block size is for Fermi and above
177 | final int WA = (widthA * block_size); // Matrix A width
178 | final int HA = (heightA * block_size); // Matrix A height
179 | final int WB = (widthB * block_size); // Matrix B width
180 | final int HB = WA; // Matrix B height
181 | int WC = WB; // Matrix C width
182 | int HC = HA; // Matrix C height
183 |
184 | int size_A = WA * HA;
185 | int mem_size_A = Float.SIZE / 8 * size_A;
186 | float[] h_A = new float[size_A];
187 | int size_B = WB * HB;
188 | int mem_size_B = Float.SIZE / 8 * size_B;
189 | float[] h_B = new float[size_B];
190 | //System.out.prinf("%.2f", valB);
191 |
192 | Log.v(LOG_TAG, "matrixMul 6");
193 |
194 | h_A = constantInit(h_A, size_A, 1.0f);
195 | h_B = constantInit(h_B, size_B, valB);
196 | // allocate device memory
197 |
198 | StringParam spD_A = new StringParam();
199 | exit_c = driver.cuMemAlloc(mem_size_A, spD_A);
200 | if (exit_c != 0) {
201 | throw new RuntimeException(new CudaException(exit_c));
202 | }
203 | String d_A = spD_A.value;
204 |
205 | StringParam spD_B = new StringParam();
206 | exit_c = driver.cuMemAlloc(mem_size_B, spD_B);
207 | if (exit_c != 0) {
208 | throw new RuntimeException(new CudaException(exit_c));
209 | }
210 | String d_B = spD_B.value;
211 |
212 | driver.cuMemcpyHtoD(d_A, h_A, mem_size_A);
213 | driver.cuMemcpyHtoD(d_B, h_B, mem_size_B);
214 | // allocate device memory for result
215 | long size_C = WC * HC;
216 | float[] h_C;
217 |
218 | Log.v(LOG_TAG, "matrixMul 7");
219 |
220 | long mem_size_C = Float.SIZE / 8 * size_C;
221 |
222 | StringParam spD_C = new StringParam();
223 | exit_c = driver.cuMemAlloc(mem_size_C, spD_C);
224 | if (exit_c != 0) {
225 | throw new RuntimeException(new CudaException(exit_c));
226 | }
227 | String d_C = spD_C.value;
228 |
229 | Util.Dim3 grid = new Util.Dim3(WC / block_size, HC / block_size, 1);
230 |
231 | int offset = 0;
232 | // setup execution parameters
233 |
234 | Log.v(LOG_TAG, "matrixMul 8");
235 |
236 | driver.cuParamSetv(cfunction, offset, d_C, Util.Sizeof.LONG);
237 |
238 | offset += Util.Sizeof.LONG;
239 | driver.cuParamSetv(cfunction, offset, d_A, Util.Sizeof.LONG);
240 | offset += Util.Sizeof.LONG;
241 | driver.cuParamSetv(cfunction, offset, d_B, Util.Sizeof.LONG);
242 | offset += Util.Sizeof.LONG;
243 |
244 | Log.v(LOG_TAG, "matrixMul 9");
245 |
246 | int Matrix_Width_A = WA;
247 | int Matrix_Width_B = WB;
248 | int Sizeof_Matrix_Width_A = Util.Sizeof.INT;
249 | int Sizeof_Matrix_Width_B = Util.Sizeof.INT;
250 |
251 |
252 | driver.cuParamSeti(cfunction, offset, Matrix_Width_A);
253 |
254 | Log.v(LOG_TAG, "matrixMul 10");
255 |
256 | offset += Sizeof_Matrix_Width_A;
257 | driver.cuParamSeti(cfunction, offset, Matrix_Width_B);
258 | offset += Sizeof_Matrix_Width_B;
259 |
260 | Log.v(LOG_TAG, "matrixMul 11");
261 |
262 | driver.cuParamSetSize(cfunction, offset);
263 | driver.cuFuncSetBlockShape(cfunction, block_size, block_size, grid.z);
264 | driver.cuFuncSetSharedSize(cfunction, 2 * block_size * block_size * (Float.SIZE / 8));
265 | driver.cuLaunchGrid(cfunction, grid.x, grid.y);
266 |
267 | FloatArrayParam fapH_C = new FloatArrayParam();
268 | exit_c = driver.cuMemcpyDtoH(d_C, mem_size_C, fapH_C);
269 | if (exit_c != 0) {
270 | throw new RuntimeException(new CudaException(exit_c));
271 | }
272 | h_C = fapH_C.values;
273 |
274 | Log.v(LOG_TAG, "matrixMul 12");
275 |
276 | boolean correct = true;
277 | for (int i = 0; i < WC * HC; i++) {
278 | if (Math.abs(h_C[i] - (WA * valB)) > 1e-2) {
279 | correct = false;
280 | }
281 | }
282 |
283 | Log.v(LOG_TAG, "matrixMul 13");
284 |
285 | driver.cuMemFree(d_A);
286 | driver.cuMemFree(d_B);
287 | driver.cuMemFree(d_C);
288 | driver.cuCtxDestroy(cuContext);
289 |
290 | driver.close();
291 | Log.v(LOG_TAG, "matrixMul 14");
292 | return correct;
293 |
294 | } catch (IOException ex) {
295 | Log.e(LOG_TAG, "Error while running MatrixMul: " + ex);
296 | throw new RuntimeException(ex);
297 | }
298 | }
299 | return false;
300 | }
301 |
302 | public static float[][] makeMatrix(int dim1, int dim2, float valB) {
303 | float[][] matrix = new float[dim1][dim2];
304 | for (int i = 0; i < matrix.length; i++)
305 | for (int j = 0; j < matrix[i].length; j++)
306 | matrix[i][j] = valB;
307 | return matrix;
308 | }
309 |
310 | public static float[] constantInit(float[] data, int size, float val) {
311 | for (int i = 0; i < size; ++i) {
312 | data[i] = val;
313 | }
314 | return data;
315 | }
316 | }
317 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RAPID - Android Demo Application
2 | This is part of the [RAPID Project](http://www.rapid-project.eu) and is an ongoing work. While RAPID envisions to support heterogeneous devices, this is the demonstration of the tasks offloading on **Android**. This demo uses the [RAPID Android Offloading Framework](https://github.com/RapidProjectH2020/rapid-android).
3 | For Java and CUDA code offloading on other platforms, have a look at the generic [RAPID Linux/Windows Demo Application](https://github.com/RapidProjectH2020/rapid-linux-DemoApp).
4 |
5 | In this page we will guide you on how to:
6 | * [Quickly Install and Test the Demo Application](#installing-and-testing-the-demo).
7 | * [Start Developing Android Applications with RAPID Offloading Support](#developing-android-applications-with-rapid-offloading-support).
8 |
9 | ## Intro
10 | RAPID enables automatic computation offloading of heavy tasks on Android and Linux/Windows Java applications.
11 | Moreover, RAPID enables the possibility for embedding CUDA code in applications for generic Android devices
12 | and for Java Linux/Windows.
13 | RAPID enables highly CPU- or GPU-demanding applications to be offered through physical or virtual devices with lower capabilities or resources than the applications require, potentially backed by remote accelerators of the same class (D2D) or higher classes (hierarchical mode).
14 | RAPID supports its acceleration service through code offloading to more capable devices or devices with more resources, when this is deemed necessary or beneficial.
15 |
16 |
17 |
18 |
19 | ### Terminology
20 | * **User Device (UD):** is the low-power device (phone, e.g.) that will be accelerated by code offloading. In our scenario it will be a phone running Android (version 4.1+ is recommended).
21 | * **Acceleration Client (AC):** is an Android library that enables code offloading on the Android applications.
22 | * **Application (AP):** is the Android application that will be accelerated by the framework. This application includes the AC as a library and uses the AC's API and the RAPID programming model.
23 | * **VM:** is a Virtual Machine running on virtualized software, with the same operating system as the UD. In our scenario it will be an Android-x86 instance (version 4.0+ is recommended) running on VirtualBox.
24 | * **Acceleration Server (AS):** is an Android application that runs on the VM and is responsible for executing the offloaded code by the client.
25 |
26 | ## Installing and Testing the Demo
27 |
28 | ### Description of the Demo App
29 | The demo application shows three representative use case offloading scenarios:
30 |
31 | * **Android Java method offloading.**
32 |
33 | This is the simplest case of computation offloading, dealing with remote execution of Java methods.
34 | We have selected the [N-Queens puzzle](https://developers.google.com/optimization/puzzles/queens) as a representative for this use case.
35 | The N-Queens puzzle is the task of *arranging N chess queens in the chess keyboard so that no two queens can attack each other*.
36 | The current implementation is a brute force algorithm.
37 | The user can vary the number of queens from 4 to 8, changing this way the difficulty of the problem and its duration.
38 | When pressing the button `Solve Nqueens`, the computation will be performed via the RAPID AC locally on the device or remotely on the VM.
39 | Cumulative statistics in terms of number of local/remote executions and average duration of local/remote executions will be shown to the user in real time.
40 | The expected result is that while increasing the number of queens, the gap between the local and remote execution should increase,
41 | with the remote executions being faster for bigger number of queens.
42 |
43 | * **Android C/C++ native function offloading.**
44 |
45 | Android allows developers to include native C/C++ code in their applications for increasing the performance
46 | of intensive tasks or for allowing code reusability.
47 | A normal Java method can call a native function thanks to the Java Native Interface (JNI).
48 | To show that RAPID supports offloading of native functions, we have included in the demo a simple application
49 | that simply returns the string "*Hello from JNI*" implemented in C++ and included as a native library in the demo application.
50 | Also in this case, the user can see cumulative statistics in terms of number and duration of local/remote execution.
51 | The expected result here is that the local execution will always be faster than the remote one,
52 | given that the native function is not computationally intensive, meaning that the remote execution is penalized by the data transmission.
53 | However, this is just a simple demo serving as a starting point for building applications that include offloadable native functions.
54 |
55 | * **Android CUDA programming and Android CUDA offloading.**
56 |
57 | The third showcase is the most complex one, including CUDA code offloading.
58 | The demo application in this case is a matrix multiplication performed using CUDA.
59 | Notice that CUDA development is not possible for the majority of Android devices.
60 | As such, the developer:
61 | * Implements her CUDA code in a development machine as if it were for another supported operating system,
62 | e.g. Linux, and generates the Parallel Thread Execution (PTX) file using the NVIDIA CUDA Compiler (nvcc).
63 | * Then, the PTX file has to be embedded in the `assets/cuda-kernels` folder of the Android application,
64 | where the RAPID framework will look for loading the file during runtime.
65 |
66 | When the execution of the method containing CUDA calls is performed locally, if the client device does not have a GPU,
67 | RAPID will offload the CUDA calls from the client device to RAPID AS,
68 | which will take care of running them on the physical GPU of the machine where it is deployed (i.e. the RAPID cloud).
69 | When the execution of the method containing the CUDA calls is performed remotely, because it is offloaded by the RAPID AC,
70 | the CUDA calls will be executed by RAPID on the remote GPU.
71 |
72 |
73 |
74 |
75 |
76 | ### Installing
77 | The demo shows how portion of the application's code can be run locally on the device or can be offloaded on a remote VM.
78 | Installation steps:
79 | 1. Clone this project in Android Studio.
80 | 2. Install the demo application in an Android device (a phone with Android 4.1+ is recommended).
81 | 3. Install the Android-x86 VM we provide on a computer that is reachable by the phone device (i.e. the phone can ping the VM).
82 | * Install [VirtualBox](https://www.virtualbox.org/) on the computer.
83 | * Download the Android-x86-6.0 VirtualBox image of the VM from the RAPID website [here](http://rapid-project.eu/files/android-x86-6.0-r3.ova).
84 | (If you are having problems with this image then you can download the Android-x86-4.4 RAPID image from [here](http://rapid-project.eu/files/android-x86-4.4.ova))
85 | * [Import the image on VirtualBox](https://docs.oracle.com/cd/E26217_01/E26796/html/qs-import-vm.html).
86 | * Launch the Android VM you just imported.
87 | * The VM will automatically start the AS, you don't have to do anything.
88 | * Get the IP of the VM (one way to do this is to open the Terminal app inside the Android-x86 VM and type `busybox ifconfig`. If this command doesn't work then try `netcfg`.).
89 | * Make sure that the phone device can ping the VM **and** the VM can ping the phone.
90 | * If you are experiencing issues with networking, you can read more about [VirtualBox
91 | networking](https://www.virtualbox.org/manual/ch06.html).
92 | * ***Notice:** In the final release of the RAPID architecture we will provide VMs running on the RAPID cloud,
93 | meaning that you will not have to deal with these steps yourself.*
94 | 4. On the phone, select the radio button `Direct connection to VM` and write the **IP of the VM** on the text box that will open
95 | (see the first figure below).
96 | 5. Press `Start` and wait until the app connects with the AS running on the VM.
97 | * A **green text** will notify that the connection with the VM was **successful**.
98 | * A **red text** will notify that the connection with the VM was **not successful**.
99 | 6. You will be presented with an Android activity showing the three demo apps.
100 | 7. You can select the execution location of the tasks using the radio buttons:
101 | * `Always Local` will instruct the framework to always execute the tasks locally on the device (phone).
102 | * `Always Remote` will instruct the framework to always execute the tasks remotely on the VM.
103 | * `Energy and Delay` will instruct the framework to make dynamic decisions and choose the execution location (local or remote) so that to minimize the energy and execution time of each task.
104 | 8. The second figure below shows the N-Queens puzzle being executed locally on the device.
105 | 9. The third figure shows the statistics of running the N-Queens puzzle in the device and remotely on the VM.
106 | * You can see that running it remotely is almost 10 times faster.
107 |
108 |