├── A_Preliminary_Report_On_evML.pdf ├── README.md └── analysis.py /A_Preliminary_Report_On_evML.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/exo-explore/evML/6562a9efb0f18b2ee2c48f504f8d6c12cfca7a23/A_Preliminary_Report_On_evML.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # evML 2 | evML is a protocol for secure distributed compute networks with negligible entry costs. It requires nodes to prove, using their secure enclave, that they're computing results within a trustworthy environment. They can't cheat unless the node operator performs costly hardware tampering. evML then uses spot-checks to catch nodes submitting false outputs. When caught, the enclave’s unique identifier is blacklisted, making the attack cost a sunk investment. Therefore, honesty the only rational behavior. The worst-case analysis in @node-behaviour shows that honesty is optimal with a 5% computational overhead, assuming a hardware attack costs over \$2000. We conclude cheating is irrational within evML, though further empirical validation is warranted. 3 | 4 | ## Preliminary report 5 | A preliminary report discussing the approach is provided. This work was led by Arbion Halili. 6 | 7 | ## Analysis code 8 | We provide analysis code in Python for the Markov Decision Process discussed in the preliminary report. To get started with this 9 | 10 | ```bash 11 | pip install pymdptoolbox 12 | python analysis.py 13 | ``` 14 | You can modify the values in the file yourself to model other scenarios. 15 | -------------------------------------------------------------------------------- /analysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mdptoolbox 3 | 4 | # Defining Parameters 5 | p = 0.05 # Probability of triggering the challenge mechanism 6 | q_d = 1 # Probability of true positives 7 | q_h = 0 # Probability of false positives 8 | R = 0.5 # Reward for completing the computation 9 | C = 0.45 # Cost for completing the computation 10 | C_1 = 0.45 # Cost for just decrypting the data 11 | discount = 0.96 # Discount factor 12 | K = 1000 # Cost of breaking the TEE (Trusted Execution Environment) 13 | S = 100 # Cost of replacing the device 14 | W = 1 # Reward of knowing private data 15 | U = 1 # Reward of altering the data 16 | 17 | # States are enumerated as follows: 18 | # Type A: 0 19 | # Type B1: 1 20 | # Type B2: 2 21 | # Restart: 3 22 | 23 | # Defining the Transition Model 24 | # Dimensions: (number of actions, number of states, number of next states) 25 | transition_model = np.zeros((3, 4, 4)) 26 | 27 | # Action 1 (a_A) 28 | transition_model[0, :, :] = np.array([ 29 | [1 - p*q_h, 0, 0, p*q_h], # From state 0 to state 0 30 | [0, 1, 0, 0], # From state 1 to state 1 31 | [0, 0, 1, 0], # From state 2 to state 2 32 | [1 - p*q_h, 0, 0, p*q_h] # From state 3 to state 0 (restart) 33 | ]) 34 | 35 | # Action 2 (a_B1) 36 | transition_model[1, :, :] = np.array([ 37 | [0, 1 - p*q_h, 0, p*q_h], # From state 0 to state 1 38 | [0, 1 - p*q_h, 0, p*q_h], # From state 1 to state 1 39 | [0, 1 - p*q_h, 0, p*q_h], # From state 2 to state 1 40 | [0, 0, 0, 1] # From state 3 to state 3 (restart) 41 | ]) 42 | 43 | # Action 3 (a_B2) 44 | transition_model[2, :, :] = np.array([ 45 | [0, 0, 1 - p*q_d, p*q_d], # From state 0 to state 2 or 3 46 | [0, 0, 1 - p*q_d, p*q_d], # From state 1 to state 2 or 3 47 | [0, 0, 1 - p*q_d, p*q_d], # From state 2 to state 2 or 3 48 | [0, 0, 0, 1] # From state 3 to state 3 (restart) 49 | ]) 50 | 51 | # Check that all transition probabilities sum to 1 52 | for a in range(transition_model.shape[0]): 53 | for s in range(transition_model.shape[1]): 54 | row_sum = np.sum(transition_model[a, s, :]) 55 | if not np.isclose(row_sum, 1.0): 56 | print(f"Warning: transition probabilities for action {a}, state {s} sum to {row_sum:.3f} (should be 1.0).") 57 | 58 | 59 | # Defining the Reward Model 60 | # Dimensions: (number of actions, number of states, number of next states) 61 | reward_model = np.zeros((3, 4, 4)) 62 | 63 | # Action 1 (a_A) 64 | reward_model[0, :, :] = np.array([ 65 | [R - C, 0, 0, -C], # Reward 66 | [0, 0, 0, 0], # No reward for transitioning from state 1 67 | [0, 0, 0, 0], # No reward for transitioning from state 2 68 | [-S, 0, 0, -S] # Cost for restarting from state 3 69 | ]) 70 | 71 | # Action 2 (a_B1) 72 | reward_model[1, :, :] = np.array([ 73 | [0, -K + R - C + W, 0, -K - C + W], 74 | [0, R - C + W, 0, - C + W], 75 | [0, R - C + W, 0, - C + W], 76 | [0, 0, 0, 0] 77 | ]) 78 | 79 | # Action 3 (a_B2) 80 | reward_model[2, :, :] = np.array([ 81 | [0, 0, -K + R - C_1 + W + U, -K - C_1 + W], 82 | [0, 0, R - C_1 + W + U, -C_1 + W], 83 | [0, 0, R - C_1 + W + U, -C_1 + W], 84 | [0, 0, 0, 0] 85 | ]) 86 | 87 | initial_policy = np.zeros(4, dtype=int) 88 | 89 | pi = mdptoolbox.mdp.PolicyIteration(transition_model, reward_model, discount, policy0=initial_policy, max_iter=1000000) 90 | pi.run() 91 | 92 | # Outputting the optimal policy and value function 93 | print("The Policy:", pi.policy) # Optimal action for each state 94 | print("The value funciton is:", pi.V) # Value function for each state 95 | --------------------------------------------------------------------------------