├── LICENSE ├── README.md ├── aggregate_data_by_addresses.ipynb ├── transaction_analysis.ipynb ├── transaction_analysis_huge_ethereum_mixer.ipynb └── transaction_analysis_huge_ethereum_mixer_171125.ipynb /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 cyber • Fund 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DataScience 2 | 3 | ## Huge Ethereum Mixer 4 | 5 | Paper here:https://blog.cyber.fund/huge-ethereum-mixer-6cf98680ee6c 6 | 7 | For working with ipython3 notebook you need: 8 | 1. Install and synchronize Parity 1.6.10 (sync Parity db isn't less than 24GB) 9 | 2. Launch ethdrain.py (https://github.com/cyberFund/ethdrain) with following parametrs: 10 | python3 ethdrain.py -o csv 11 | 3. Copy "transactions.csv" to path ipython3 notebook 12 | 4. Run all in transaction_analysis_huge_ethereum_mixer.ipynb 13 | -------------------------------------------------------------------------------- /aggregate_data_by_addresses.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Import libraries" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "import numpy as np\n", 20 | "from datetime import datetime, timedelta" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "Load csv-file with txn and transform format 3 fields" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "myfile = 'transactions_2.csv'\n", 39 | "transactions = pd.read_csv(myfile, sep=';', header=0)\n", 40 | "transactions.loc[:,'blockTimestamp'] = pd.to_datetime(transactions['blockTimestamp'])\n", 41 | "transactions.loc[:,'gas'] = transactions['gas'].map(lambda x: int(x,16))\n", 42 | "transactions.loc[:,'gasPrice'] = transactions['gasPrice'].map(lambda x: int(x,16))" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "Aggregate of data for addresses \"from\"" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stderr", 59 | "output_type": "stream", 60 | "text": [ 61 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:5: FutureWarning: using a dict on a Series for aggregation\n", 62 | "is deprecated and will be removed in a future version\n", 63 | " \"\"\"\n", 64 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:10: FutureWarning: using a dict on a Series for aggregation\n", 65 | "is deprecated and will be removed in a future version\n", 66 | " # Remove the CWD from sys.path while we load stuff.\n", 67 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:15: FutureWarning: using a dict on a Series for aggregation\n", 68 | "is deprecated and will be removed in a future version\n", 69 | " from ipykernel import kernelapp as app\n", 70 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:21: FutureWarning: using a dict on a Series for aggregation\n", 71 | "is deprecated and will be removed in a future version\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "address_from_blockTimestamp = transactions.groupby('from')['blockTimestamp'].agg({'count':np.count_nonzero, \n", 77 | " 'delta_timestamp': lambda x: \n", 78 | " (np.max(x) - np.min(x)).total_seconds(),\n", 79 | " 'min_timestamp':np.min,\n", 80 | " 'max_timestamp':np.max}).reset_index()\n", 81 | "\n", 82 | "address_from_value = transactions.groupby('from')['value'].agg({'count_value_non_zero':np.count_nonzero, \n", 83 | " 'mean_value':np.mean, \n", 84 | " 'max_value':np.max, \n", 85 | " 'sum_value':np.sum}).reset_index()\n", 86 | "\n", 87 | "address_from_gas = transactions.groupby('from')['gas'].agg({'count_non_zero_gas':np.count_nonzero, \n", 88 | " 'min_gas':np.min, 'mean_gas':np.mean, \n", 89 | " 'max_gas':np.max, \n", 90 | " 'sum_gas':np.sum}).reset_index()\n", 91 | "\n", 92 | "address_from_gasPrice = transactions.groupby('from')['gasPrice'].agg({'count_non_zero_gasPrise':np.count_nonzero, \n", 93 | " 'min_gasPrice':np.min, \n", 94 | " 'mean_gasPrice':np.mean, \n", 95 | " 'max_gasPrice':np.max, \n", 96 | " 'sum_gasPrice':np.sum}).reset_index()" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "Aggregate of data for addresses \"to\"" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 9, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "name": "stderr", 113 | "output_type": "stream", 114 | "text": [ 115 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:2: FutureWarning: using a dict on a Series for aggregation\n", 116 | "is deprecated and will be removed in a future version\n", 117 | " \n", 118 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:8: FutureWarning: using a dict on a Series for aggregation\n", 119 | "is deprecated and will be removed in a future version\n", 120 | " \n", 121 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:14: FutureWarning: using a dict on a Series for aggregation\n", 122 | "is deprecated and will be removed in a future version\n", 123 | " \n", 124 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:20: FutureWarning: using a dict on a Series for aggregation\n", 125 | "is deprecated and will be removed in a future version\n", 126 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:22: FutureWarning: using a dict on a Series for aggregation\n", 127 | "is deprecated and will be removed in a future version\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "address_to_blockTimestamp = transactions.groupby('to')['blockTimestamp'].agg({'count':np.count_nonzero, \n", 133 | " 'delta_timestamp': lambda x: \n", 134 | " (np.max(x) - np.min(x)).total_seconds(),\n", 135 | " 'min_timestamp':np.min,\n", 136 | " 'max_timestamp':np.max}).reset_index()\n", 137 | "\n", 138 | "address_to_value = transactions.groupby('to')['value'].agg({'count_value_non_zero':np.count_nonzero, \n", 139 | " 'mean_value':np.mean, \n", 140 | " 'max_value':np.max, \n", 141 | " 'sum_value':np.sum}).reset_index()\n", 142 | "\n", 143 | "address_to_gas = transactions.groupby('to')['gas'].agg({'count_non_zero_gas':np.count_nonzero, \n", 144 | " 'min_gas':np.min, \n", 145 | " 'mean_gas':np.mean, \n", 146 | " 'max_gas':np.max, \n", 147 | " 'sum_gas':np.sum}).reset_index()\n", 148 | "\n", 149 | "address_to_gasPrice = transactions.groupby('to')['gasPrice'].agg({'count_non_zero_gasPrise':np.count_nonzero, \n", 150 | " 'min_gasPrice':np.min, \n", 151 | " 'mean_gasPrice':np.mean, \n", 152 | " 'max_gasPrice':np.max, \n", 153 | " 'sum_gasPrice':np.sum}).reset_index()\n", 154 | "\n", 155 | "address_is_contract = transactions.groupby('to')['input'].agg({'is_contract': lambda x: \n", 156 | " False if len(np.unique(x))>1 \n", 157 | " or np.unique(x)=='0x' else True}).reset_index()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "Merging aggregated DataFrames to one.
\n", 165 | "You may uncoment rows for outer-join" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 10, 171 | "metadata": { 172 | "collapsed": true 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "address_from = pd.merge(pd.merge(pd.merge(address_from_blockTimestamp, \n", 177 | " address_from_value,\n", 178 | " on='from'),\n", 179 | " address_from_gas,\n", 180 | " on='from'),\n", 181 | " address_from_gasPrice,\n", 182 | " on='from')\n", 183 | "\n", 184 | "address_to = pd.merge(pd.merge(pd.merge(pd.merge(address_to_blockTimestamp, \n", 185 | " address_to_value,\n", 186 | " on='to'),\n", 187 | " address_is_contract,\n", 188 | " on='to'),\n", 189 | " address_to_gas,\n", 190 | " on='to'),\n", 191 | " address_to_gasPrice,\n", 192 | " on='to')\n", 193 | "\n", 194 | "addresses_analysis = pd.merge(address_from, \n", 195 | " address_to,\n", 196 | "# how='outer', \n", 197 | " left_on='from', \n", 198 | " right_on='to', \n", 199 | " suffixes=('_from', '_to'))\n", 200 | "\n", 201 | "# addresses_to_analysis.fillna(0, inplace=True)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "Calculating of address field" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 12, 214 | "metadata": { 215 | "collapsed": true 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "def address_function(x):\n", 220 | " return x['from'] if x['from']==x['from'] else x['to']\n", 221 | "\n", 222 | "addresses_analysis.loc[:,'address'] = addresses_analysis.apply(address_function, axis=1)\n", 223 | "addresses_analysis.drop(['from','to'], inplace=True, axis=1)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "Save results" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 13, 236 | "metadata": { 237 | "collapsed": true 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "addresses_analysis.to_csv('addresses_analysis.csv')\n", 242 | "# address_to.to_csv('addresses_to.csv')\n", 243 | "# address_from.to_csv('addresses_from.csv')" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 5, 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "ename": "NameError", 253 | "evalue": "name 'address_to_blockTimestamp' is not defined", 254 | "output_type": "error", 255 | "traceback": [ 256 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 257 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 258 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0maddress_to_blockTimestamp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 259 | "\u001b[0;31mNameError\u001b[0m: name 'address_to_blockTimestamp' is not defined" 260 | ] 261 | } 262 | ], 263 | "source": [ 264 | "address_to_blockTimestamp" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": { 271 | "collapsed": true 272 | }, 273 | "outputs": [], 274 | "source": [] 275 | } 276 | ], 277 | "metadata": { 278 | "kernelspec": { 279 | "display_name": "Python 3", 280 | "language": "python", 281 | "name": "python3" 282 | }, 283 | "language_info": { 284 | "codemirror_mode": { 285 | "name": "ipython", 286 | "version": 3 287 | }, 288 | "file_extension": ".py", 289 | "mimetype": "text/x-python", 290 | "name": "python", 291 | "nbconvert_exporter": "python", 292 | "pygments_lexer": "ipython3", 293 | "version": "3.5.2" 294 | } 295 | }, 296 | "nbformat": 4, 297 | "nbformat_minor": 2 298 | } 299 | --------------------------------------------------------------------------------