├── .gitignore ├── Makefile ├── README.md ├── deephack.py ├── docs └── man │ └── man1 │ └── python_truss.1.ronn ├── dqnagent.py ├── models ├── postgresv-v1.model └── sqlite-v1.model ├── scripts ├── build.sh ├── postgres.sh ├── qt.py └── vulnserver.sh ├── setup.py ├── state.py ├── tests ├── __init__.py ├── test_cli.py └── test_lib.py ├── tox.ini └── vulnserver ├── __init__.py ├── cli ├── __init__.py └── vulnserver.py └── lib ├── __init__.py ├── keywords ├── models.py └── vroutes.py /.gitignore: -------------------------------------------------------------------------------- 1 | #Model file 2 | saved.model 3 | training.json 4 | TensorBoard/ 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | env/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *,cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # dotenv 87 | .env 88 | 89 | # virtualenv 90 | .venv 91 | venv/ 92 | ENV/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make 2 | # WARN: gmake syntax 3 | ######################################################## 4 | # 5 | # useful targets: 6 | # make test - run the unit tests 7 | # make flake8 - linting and pep8 8 | # make docs - create manpages and html documentation 9 | # make loc - stats about loc 10 | 11 | ######################################################## 12 | # variable section 13 | 14 | NAME = vulnserver 15 | OS = $(shell uname -s) 16 | PYTHON = $(shell which python3) 17 | VIRTUALENV_PATH = $(shell echo $$HOME/.virtualenvs) 18 | INSTALL_PATH = /usr/local/lib 19 | EXEC_PATH = /usr/local/bin 20 | 21 | MANPAGES=$(wildcard docs/man/**/*.*.ronn) 22 | MANPAGES_GEN=$(patsubst %.ronn,%,$(MANPAGES)) 23 | MANPAGES_HTML=$(patsubst %.ronn,%.html,$(MANPAGES)) 24 | ifneq ($(shell which ronn 2>/dev/null),) 25 | RONN2MAN = ronn 26 | else 27 | RONN2MAN = @echo "ERROR: 'ronn' command is not installed but is required to build $(MANPAGES)" && exit 1 28 | endif 29 | 30 | UNITTESTS=unittest 31 | COVERAGE=coverage 32 | 33 | ######################################################## 34 | 35 | 36 | docs: $(MANPAGES) 37 | $(RONN2MAN) $^ 38 | 39 | .PHONY: clean 40 | clean: 41 | rm -f $(MANPAGES_GEN) $(MANPAGES_HTML) 42 | rm -rf ./build 43 | rm -rf ./dist 44 | rm -rf ./*.egg-info 45 | rm -rf ./*.deb 46 | rm -rf .tox 47 | rm -rf .coverage 48 | rm -rf .sloccount 49 | rm -rf .cache 50 | find . -name '*.pyc.*' -delete 51 | find . -name '*.pyc' -delete 52 | find . -name '__pycache__' -delete 53 | 54 | loc: 55 | mkdir -p .sloccount 56 | sloccount --datadir .sloccount $(NAME)/lib $(NAME)/cli | grep -v SLOCCount | grep -v license | grep -v "see the documentation" 57 | 58 | flake8: 59 | @echo "#############################################" 60 | @echo "# Running flake8 Compliance Tests" 61 | @echo "#############################################" 62 | -flake8 --ignore=E501,E221,W291,W391,E302,E251,E203,W293,E231,E303,E201,E225,E261,E241 $(NAME)/lib $(NAME)/cli 63 | 64 | test: 65 | py.test --cov-report term --cov=$(NAME) ./tests/* 66 | 67 | virtualenv: 68 | mkdir -p $(VIRTUALENV_PATH) 69 | rm -rf $(VIRTUALENV_PATH)/$(NAME) 70 | virtualenv -p $(PYTHON) $(VIRTUALENV_PATH)/$(NAME) 71 | 72 | virtualenv-install: virtualenv 73 | $(VIRTUALENV_PATH)/$(NAME)/bin/python setup.py install 74 | 75 | virtualenv-develop: virtualenv 76 | # hack no idea why this isn't working 77 | $(VIRTUALENV_PATH)/$(NAME)/bin/pip install numpy scipy 78 | $(VIRTUALENV_PATH)/$(NAME)/bin/python3 setup.py develop 79 | 80 | virtualenv-sdist: virtualenv 81 | $(VIRTUALENV_PATH)/$(NAME)/bin/python setup.py sdist 82 | 83 | install: 84 | cp -r $(VIRTUALENV_PATH)/$(NAME) $(INSTALL_PATH)/$(NAME) 85 | ln -f -s $(INSTALL_PATH)/$(NAME)/bin/$(NAME) $(EXEC_PATH)/$(NAME) 86 | 87 | container: 88 | bash ./scripts/build.sh -d 89 | bash ./scripts/build.sh -b 90 | 91 | all: docs flake8 test loc 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | docs/man/man1/python_truss.1.ronn -------------------------------------------------------------------------------- /deephack.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import dqnagent 3 | import state 4 | import numpy as np 5 | import requests 6 | import argparse 7 | import json 8 | import sys 9 | from collections import deque 10 | 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--supervise", help="Perform supervised learning with argument as file") 13 | parser.add_argument("--batchsize", help="Training batch size", default=32) 14 | parser.add_argument("--epochs", help="num epochs", default=10) 15 | parser.add_argument("--tensorboard", help="enable tensorboard", action='store_true') 16 | parser.add_argument("model") 17 | args = parser.parse_args() 18 | 19 | filename = args.model 20 | 21 | #TODO: Hyperparameter. 22 | #This is the maxmimum length of our query string 23 | input_len = 30 24 | context_len = 5 25 | max_qstringlen = 140 26 | 27 | agent = dqnagent.Agent(input_len=input_len, context_len=context_len) 28 | agent.load(filename) 29 | 30 | if args.supervise != None: 31 | types = args.supervise.split(',') 32 | for rounds in range(500): 33 | contents = [] 34 | [contents.extend(v(int(args.trainscale))) for k,v in supervising_types.items() if k in types] 35 | if not contents: 36 | print("cannot supervise on types: {0}\nvalid types are {1}".format( 37 | types, supervising_types.keys())) 38 | sys.exit(1) 39 | print("-" * 20) 40 | print("\tRound " + str(rounds)) 41 | agent.train_batch(contents, int(args.batchsize), int(args.epochs), args.tensorboard) 42 | agent.save(filename) 43 | 44 | print("Finished supervising.") 45 | agent.save(filename) 46 | else: 47 | table = state.State() 48 | failed_attempts = {} 49 | queries = set() 50 | while True: 51 | #Iterate through the state table and try to add on to each item in there (plus the empty string) 52 | for context in table: 53 | value = table.value() 54 | 55 | qstring = "" 56 | input_state = "" 57 | experience = dqnagent.Experience() 58 | #Predict a character until it produces an end-of-string character (|) or it reaches the max length 59 | while not qstring.endswith("|") and len(qstring) < max_qstringlen: 60 | #Shrink our query down to length input_len 61 | input_state = qstring[-input_len:] 62 | attempts = [] 63 | if (input_state, context) in failed_attempts: 64 | attempts = failed_attempts[input_state, context] 65 | action = agent.act(input_state, context, attempts) 66 | experience.add(input_state, context, attempts, action) 67 | qstring += action 68 | 69 | #Remove the trailing bar, it's not actually supposed to be sent 70 | chopped = qstring 71 | if qstring.endswith("|"): 72 | chopped = qstring[:-1] 73 | 74 | # Is this a repeat or blank? 75 | repeat = qstring in queries or (chopped.split("%")[0][-1:] == "'") 76 | queries.add(qstring) 77 | 78 | success = False 79 | if not repeat: 80 | #Perform the action 81 | param = {"user_id": chopped} 82 | req = requests.get("http://127.0.0.1:5000/v0/sqli/select", params=param) 83 | success = req.status_code == 200 and (len(req.text) >2) 84 | 85 | #If the query was successful, update the state table 86 | if success: 87 | print("Got a hit!", qstring) 88 | lastchar = chopped.split("%")[0][-1:] 89 | table.update(context+lastchar) 90 | #Find out what reward we received 91 | value_new = table.value() 92 | reward = value_new - value 93 | 94 | #Learn from how that action performed 95 | # attempts = [] 96 | # if (input_state, context) in failed_attempts: 97 | # attempts = failed_attempts[input_state, context] 98 | # agent.train_single(qstring[-1], context, input_state, attempts, reward) 99 | #agent.train_experience(experience, success) 100 | 101 | else: 102 | # Add the character we just tried to the list of failures 103 | # So that we can use it as input in later attempts 104 | lastchar = chopped.split("%")[0][-1:] 105 | guess_state = chopped.split("%")[0][:-1][-input_len:] 106 | print("Incorrect: ", qstring) 107 | if (guess_state, context) in failed_attempts: 108 | failures = failed_attempts[guess_state, context] 109 | if lastchar not in failures: 110 | failures.extend(lastchar) 111 | failed_attempts[guess_state, context] = failures 112 | # Add this character to the list of failures 113 | # Unless this was just a repeat. In which case ignore it 114 | elif not repeat: 115 | failed_attempts[guess_state, context] = [lastchar] 116 | -------------------------------------------------------------------------------- /docs/man/man1/python_truss.1.ronn: -------------------------------------------------------------------------------- 1 | DeepHack 2 | ==== 3 | 4 | ## SYNOPSIS 5 | 6 | `make virtualenv-develop` 7 | 8 | `~/.virtualenvs/vulnserver/bin/vulnserver` 9 | 10 | `curl 127.0.0.1:5000/v0/sqli/select?user_id=1` 11 | 12 | ## Dependencies 13 | 14 | sudo apt install virtualenv libpq-dev 15 | sudo pip3 install keras tensorflow 16 | 17 | ## DESCRIPTION 18 | 19 | DeepHack 20 | 21 | ## COMMAND LINE OPTIONS 22 | 23 | * `--example` : 24 | This is an example action, replace it with a real argument! 25 | 26 | ## EXAMPLES 27 | 28 | Setup a python virtualenv to do development work 29 | 30 | $ make virtualenv-develop 31 | 32 | Create an LXC and install the module into it for development 33 | 34 | $ make lxc-develop 35 | 36 | Create an LXC and install the project into it for production 37 | 38 | $ make lxc 39 | -------------------------------------------------------------------------------- /dqnagent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import os 4 | import json 5 | import string 6 | from collections import deque 7 | 8 | import keras 9 | from keras.models import Sequential, Model 10 | from keras.layers import LSTM, Dense, Activation, Flatten, Concatenate, Input 11 | from keras.optimizers import Adam 12 | from keras.callbacks import TensorBoard 13 | 14 | """An 'experience' consists of a grouping of the previous states and actions we took""" 15 | class Experience(): 16 | def __init__(self): 17 | self.list = [] 18 | 19 | def add(self, qstring, context, attempts, action): 20 | self.list.append({"qstring":qstring, "context":context, "attempts":attempts, "action":action}) 21 | 22 | """This defines the shape of the model we're building in a mostly declarative way""" 23 | class Agent(): 24 | #input_len: Length of the input 25 | def __init__(self, input_len, context_len): 26 | 27 | chars = list(string.ascii_lowercase + string.digits + "=.;_ '()|%") 28 | print("Valid character set: ", chars) 29 | 30 | self.char_depth = len(chars) 31 | 32 | self.char_indices = dict((c, i) for i, c in enumerate(chars)) 33 | self.indices_char = dict((i, c) for i, c in enumerate(chars)) 34 | 35 | # Build the model 36 | context_input = Input(shape=(context_len, self.char_depth), name='context_input') 37 | context_layer_1 = LSTM(32, name='context_layer_1')(context_input) 38 | context_layer_3 = Dense(128, name='context_layer_3')(context_layer_1) 39 | 40 | qstring_input = Input(shape=(input_len, self.char_depth), name='qstring_input') 41 | qstring_layer_1 = LSTM(128, name='qstring_layer_1')(qstring_input) 42 | qstring_layer_3 = Dense(128, name='qstring_layer_3')(qstring_layer_1) 43 | 44 | attempts_input = Input(shape=(self.char_depth,), name='attempts_input') 45 | attempts_layer_1 = Dense(128, name='attempts_layer_1')(attempts_input) 46 | 47 | x = keras.layers.concatenate([context_layer_3, qstring_layer_3]) 48 | x = Dense(64, name='hidden_layer_1')(x) 49 | x = keras.layers.concatenate([x, attempts_layer_1]) 50 | 51 | # We stack a deep densely-connected network on top 52 | main_output = Dense(self.char_depth, activation='softmax', name="output_layer")(x) 53 | self.model = Model(inputs=[context_input, qstring_input, attempts_input], outputs=[main_output]) 54 | print(self.model.summary()) 55 | 56 | #"Compile" the model 57 | #XXX: Hyperparameters here. May need tinkering. 58 | self.supervised_reward = 1 59 | self.content = None 60 | self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 61 | self.temperature = .2 62 | # We want a pretty steep dropoff here 63 | self.epsilon = .65 64 | self.input_len = input_len 65 | self.context_len = context_len 66 | 67 | #Given the current state, choose an action an return it 68 | #Stochastic! (ie: we choose an action at random, using each state as a probability) 69 | def act(self, qstring, context, attempts): 70 | qstring = self.onehot(qstring, self.input_len) 71 | context = self.onehot(context, self.context_len) 72 | attempts = self.encodeattempts(attempts) 73 | #[context_layer, qstring_layer, attempts_layer] 74 | predictions = self.model.predict_on_batch([context, qstring, attempts]) 75 | action = self.sample(predictions[0], self.temperature) 76 | #print(predictions, action, self.indices_char[action]) 77 | return self.indices_char[action] 78 | 79 | #Given a stochastic prediction, generate a concrete sample from it 80 | #temperature: How much we should "smear" the probability distribution. 0 means not at all, high numbers is more. 81 | def sample(self, preds, temperature=1.0): 82 | # helper function to sample an index from a probability array 83 | with np.errstate(divide='ignore'): 84 | preds = np.asarray(preds).astype('float64') 85 | preds = np.log(preds) / temperature 86 | exp_preds = np.exp(preds) 87 | preds = exp_preds / np.sum(exp_preds) 88 | probas = np.random.multinomial(1, preds, 1) 89 | return np.argmax(probas) 90 | 91 | #Update the model for a single given experience 92 | def train_single(self, action, context, prev_state, attempts, reward): 93 | #make a one-hot array of our output choices, with the "hot" option 94 | # equal to our discounted reward 95 | action = self.char_indices[action] 96 | prev_state = self.onehot(prev_state, self.input_len) 97 | reward_array = np.zeros((1, self.char_depth)) 98 | reward_array[0, action] = reward 99 | attempts = self.encodeattempts(attempts) 100 | context = self.onehot(context, self.context_len) 101 | #[context_layer, qstring_layer, attempts_layer] 102 | self.model.train_on_batch([context, prev_state, attempts], reward_array) 103 | 104 | # Batch the training 105 | def train_batch(self, contents, batch_size, epochs, tensorboard=False): 106 | self.content = contents 107 | 108 | qstrings = np.zeros((len(self.content), self.input_len, self.char_depth)) 109 | contexts = np.zeros((len(self.content), self.context_len, self.char_depth)) 110 | attempts = np.zeros((len(self.content), self.char_depth)) 111 | rewards = np.zeros((len(self.content), self.char_depth)) 112 | 113 | for i in range(len(self.content)): 114 | entry = random.choice(self.content) 115 | 116 | qstrings[i] = self.onehot(entry["qstring"], self.input_len) 117 | contexts[i] = self.onehot(entry["context"], self.context_len) 118 | attempts[i] = self.encodeattempts(entry["attempts"]) 119 | reward = self.supervised_reward 120 | if not entry["success"]: 121 | reward *= -0.1 122 | action = self.char_indices[entry["action"]] 123 | reward_array = np.zeros(self.char_depth) 124 | reward_array[action] = reward 125 | rewards[i] = reward_array 126 | callbacks = [] 127 | if tensorboard: 128 | callbacks.append(TensorBoard(log_dir='./TensorBoard', histogram_freq=1, write_graph=True)) 129 | self.model.fit([contexts, qstrings, attempts], rewards, callbacks=callbacks, 130 | epochs=epochs, batch_size=batch_size, verbose=1, validation_split=0.2) 131 | 132 | # Given a full experience, go back and reward it appropriately 133 | def train_experience(self, experience, success): 134 | experience.list.reverse() 135 | reward = self.supervised_reward 136 | if not success: 137 | reward *= -0.1 138 | i = 0 139 | for item in experience.list: 140 | i += 1 141 | if i > 4: 142 | if item["action"] == "'": 143 | return 144 | reward *= self.epsilon 145 | self.train_single(item["action"], item["context"], item["qstring"], item["attempts"], reward) 146 | 147 | #Encode a given string into a 2d array of one-hot encoded numpy arrays 148 | def onehot(self, string, length): 149 | assert len(string) <= length 150 | #First, pad the string out to be 'input_len' long 151 | string = string.ljust(length, "|") 152 | 153 | output = np.zeros((1, length, self.char_depth), dtype=np.bool) 154 | for index, item in enumerate(string): 155 | output[0, index, self.char_indices[item]] = 1 156 | return output 157 | 158 | def encodeattempts(self, attempts): 159 | output = np.zeros((1, self.char_depth)) 160 | for i, item in enumerate(attempts): 161 | output[0, i] = 1 162 | return output 163 | 164 | def save(self, path): 165 | self.model.save_weights(path) 166 | print("Saved model to disk") 167 | 168 | def load(self, path): 169 | if os.path.isfile(path): 170 | self.model.load_weights(path) 171 | print("Loaded model from disk") 172 | else: 173 | print("No model on disk, starting fresh") 174 | -------------------------------------------------------------------------------- /models/postgresv-v1.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BishopFox/deephack/3a0b837130cbfa000c4f3a33798affd93dc31d17/models/postgresv-v1.model -------------------------------------------------------------------------------- /models/sqlite-v1.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BishopFox/deephack/3a0b837130cbfa000c4f3a33798affd93dc31d17/models/sqlite-v1.model -------------------------------------------------------------------------------- /scripts/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PROGRAM_NAME="python_truss" 4 | DEV_DEPS="python-virtualenv python3 python3-dev postgresql-server-dev-all" 5 | SYSTEM_DEPS="postgresql python-psychopg2 postgresql-server-dev-all" 6 | ALL_DEPS=$(echo $DEV_DEPS $SYSTEM_DEPS | tr ' ' ',' | tr '\n' ',') 7 | 8 | # get current directory 9 | SCRIPTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 10 | # strip current dir to get to the root of the project directory 11 | SRC_DIR=${SCRIPTS_DIR%$(basename $SCRIPTS_DIR)} 12 | 13 | CONTAINER_NAME='deephack.lxc' 14 | 15 | lxc_config="/var/lib/lxc/$CONTAINER_NAME/config" 16 | lxc_rootfs="/var/lib/lxc/$CONTAINER_NAME/rootfs/" 17 | 18 | Lxc_create() { 19 | name=$1 20 | if [[ -z $name ]]; then 21 | cat <