├── LICENSE
├── README.md
├── aggregate_data_by_addresses.ipynb
├── transaction_analysis.ipynb
├── transaction_analysis_huge_ethereum_mixer.ipynb
└── transaction_analysis_huge_ethereum_mixer_171125.ipynb
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 cyber • Fund
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DataScience
2 |
3 | ## Huge Ethereum Mixer
4 |
5 | Paper here:https://blog.cyber.fund/huge-ethereum-mixer-6cf98680ee6c
6 |
7 | For working with ipython3 notebook you need:
8 | 1. Install and synchronize Parity 1.6.10 (sync Parity db isn't less than 24GB)
9 | 2. Launch ethdrain.py (https://github.com/cyberFund/ethdrain) with following parametrs:
10 | python3 ethdrain.py -o csv
11 | 3. Copy "transactions.csv" to path ipython3 notebook
12 | 4. Run all in transaction_analysis_huge_ethereum_mixer.ipynb
13 |
--------------------------------------------------------------------------------
/aggregate_data_by_addresses.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Import libraries"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 2,
13 | "metadata": {
14 | "collapsed": true
15 | },
16 | "outputs": [],
17 | "source": [
18 | "import pandas as pd\n",
19 | "import numpy as np\n",
20 | "from datetime import datetime, timedelta"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "Load csv-file with txn and transform format 3 fields"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 3,
33 | "metadata": {
34 | "collapsed": true
35 | },
36 | "outputs": [],
37 | "source": [
38 | "myfile = 'transactions_2.csv'\n",
39 | "transactions = pd.read_csv(myfile, sep=';', header=0)\n",
40 | "transactions.loc[:,'blockTimestamp'] = pd.to_datetime(transactions['blockTimestamp'])\n",
41 | "transactions.loc[:,'gas'] = transactions['gas'].map(lambda x: int(x,16))\n",
42 | "transactions.loc[:,'gasPrice'] = transactions['gasPrice'].map(lambda x: int(x,16))"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "Aggregate of data for addresses \"from\""
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 4,
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "name": "stderr",
59 | "output_type": "stream",
60 | "text": [
61 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:5: FutureWarning: using a dict on a Series for aggregation\n",
62 | "is deprecated and will be removed in a future version\n",
63 | " \"\"\"\n",
64 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:10: FutureWarning: using a dict on a Series for aggregation\n",
65 | "is deprecated and will be removed in a future version\n",
66 | " # Remove the CWD from sys.path while we load stuff.\n",
67 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:15: FutureWarning: using a dict on a Series for aggregation\n",
68 | "is deprecated and will be removed in a future version\n",
69 | " from ipykernel import kernelapp as app\n",
70 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:21: FutureWarning: using a dict on a Series for aggregation\n",
71 | "is deprecated and will be removed in a future version\n"
72 | ]
73 | }
74 | ],
75 | "source": [
76 | "address_from_blockTimestamp = transactions.groupby('from')['blockTimestamp'].agg({'count':np.count_nonzero, \n",
77 | " 'delta_timestamp': lambda x: \n",
78 | " (np.max(x) - np.min(x)).total_seconds(),\n",
79 | " 'min_timestamp':np.min,\n",
80 | " 'max_timestamp':np.max}).reset_index()\n",
81 | "\n",
82 | "address_from_value = transactions.groupby('from')['value'].agg({'count_value_non_zero':np.count_nonzero, \n",
83 | " 'mean_value':np.mean, \n",
84 | " 'max_value':np.max, \n",
85 | " 'sum_value':np.sum}).reset_index()\n",
86 | "\n",
87 | "address_from_gas = transactions.groupby('from')['gas'].agg({'count_non_zero_gas':np.count_nonzero, \n",
88 | " 'min_gas':np.min, 'mean_gas':np.mean, \n",
89 | " 'max_gas':np.max, \n",
90 | " 'sum_gas':np.sum}).reset_index()\n",
91 | "\n",
92 | "address_from_gasPrice = transactions.groupby('from')['gasPrice'].agg({'count_non_zero_gasPrise':np.count_nonzero, \n",
93 | " 'min_gasPrice':np.min, \n",
94 | " 'mean_gasPrice':np.mean, \n",
95 | " 'max_gasPrice':np.max, \n",
96 | " 'sum_gasPrice':np.sum}).reset_index()"
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "metadata": {},
102 | "source": [
103 | "Aggregate of data for addresses \"to\""
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 9,
109 | "metadata": {},
110 | "outputs": [
111 | {
112 | "name": "stderr",
113 | "output_type": "stream",
114 | "text": [
115 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:2: FutureWarning: using a dict on a Series for aggregation\n",
116 | "is deprecated and will be removed in a future version\n",
117 | " \n",
118 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:8: FutureWarning: using a dict on a Series for aggregation\n",
119 | "is deprecated and will be removed in a future version\n",
120 | " \n",
121 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:14: FutureWarning: using a dict on a Series for aggregation\n",
122 | "is deprecated and will be removed in a future version\n",
123 | " \n",
124 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:20: FutureWarning: using a dict on a Series for aggregation\n",
125 | "is deprecated and will be removed in a future version\n",
126 | "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:22: FutureWarning: using a dict on a Series for aggregation\n",
127 | "is deprecated and will be removed in a future version\n"
128 | ]
129 | }
130 | ],
131 | "source": [
132 | "address_to_blockTimestamp = transactions.groupby('to')['blockTimestamp'].agg({'count':np.count_nonzero, \n",
133 | " 'delta_timestamp': lambda x: \n",
134 | " (np.max(x) - np.min(x)).total_seconds(),\n",
135 | " 'min_timestamp':np.min,\n",
136 | " 'max_timestamp':np.max}).reset_index()\n",
137 | "\n",
138 | "address_to_value = transactions.groupby('to')['value'].agg({'count_value_non_zero':np.count_nonzero, \n",
139 | " 'mean_value':np.mean, \n",
140 | " 'max_value':np.max, \n",
141 | " 'sum_value':np.sum}).reset_index()\n",
142 | "\n",
143 | "address_to_gas = transactions.groupby('to')['gas'].agg({'count_non_zero_gas':np.count_nonzero, \n",
144 | " 'min_gas':np.min, \n",
145 | " 'mean_gas':np.mean, \n",
146 | " 'max_gas':np.max, \n",
147 | " 'sum_gas':np.sum}).reset_index()\n",
148 | "\n",
149 | "address_to_gasPrice = transactions.groupby('to')['gasPrice'].agg({'count_non_zero_gasPrise':np.count_nonzero, \n",
150 | " 'min_gasPrice':np.min, \n",
151 | " 'mean_gasPrice':np.mean, \n",
152 | " 'max_gasPrice':np.max, \n",
153 | " 'sum_gasPrice':np.sum}).reset_index()\n",
154 | "\n",
155 | "address_is_contract = transactions.groupby('to')['input'].agg({'is_contract': lambda x: \n",
156 | " False if len(np.unique(x))>1 \n",
157 | " or np.unique(x)=='0x' else True}).reset_index()"
158 | ]
159 | },
160 | {
161 | "cell_type": "markdown",
162 | "metadata": {},
163 | "source": [
164 | "Merging aggregated DataFrames to one.
\n",
165 | "You may uncoment rows for outer-join"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": 10,
171 | "metadata": {
172 | "collapsed": true
173 | },
174 | "outputs": [],
175 | "source": [
176 | "address_from = pd.merge(pd.merge(pd.merge(address_from_blockTimestamp, \n",
177 | " address_from_value,\n",
178 | " on='from'),\n",
179 | " address_from_gas,\n",
180 | " on='from'),\n",
181 | " address_from_gasPrice,\n",
182 | " on='from')\n",
183 | "\n",
184 | "address_to = pd.merge(pd.merge(pd.merge(pd.merge(address_to_blockTimestamp, \n",
185 | " address_to_value,\n",
186 | " on='to'),\n",
187 | " address_is_contract,\n",
188 | " on='to'),\n",
189 | " address_to_gas,\n",
190 | " on='to'),\n",
191 | " address_to_gasPrice,\n",
192 | " on='to')\n",
193 | "\n",
194 | "addresses_analysis = pd.merge(address_from, \n",
195 | " address_to,\n",
196 | "# how='outer', \n",
197 | " left_on='from', \n",
198 | " right_on='to', \n",
199 | " suffixes=('_from', '_to'))\n",
200 | "\n",
201 | "# addresses_to_analysis.fillna(0, inplace=True)"
202 | ]
203 | },
204 | {
205 | "cell_type": "markdown",
206 | "metadata": {},
207 | "source": [
208 | "Calculating of address field"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 12,
214 | "metadata": {
215 | "collapsed": true
216 | },
217 | "outputs": [],
218 | "source": [
219 | "def address_function(x):\n",
220 | " return x['from'] if x['from']==x['from'] else x['to']\n",
221 | "\n",
222 | "addresses_analysis.loc[:,'address'] = addresses_analysis.apply(address_function, axis=1)\n",
223 | "addresses_analysis.drop(['from','to'], inplace=True, axis=1)"
224 | ]
225 | },
226 | {
227 | "cell_type": "markdown",
228 | "metadata": {},
229 | "source": [
230 | "Save results"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": 13,
236 | "metadata": {
237 | "collapsed": true
238 | },
239 | "outputs": [],
240 | "source": [
241 | "addresses_analysis.to_csv('addresses_analysis.csv')\n",
242 | "# address_to.to_csv('addresses_to.csv')\n",
243 | "# address_from.to_csv('addresses_from.csv')"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": 5,
249 | "metadata": {},
250 | "outputs": [
251 | {
252 | "ename": "NameError",
253 | "evalue": "name 'address_to_blockTimestamp' is not defined",
254 | "output_type": "error",
255 | "traceback": [
256 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
257 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
258 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0maddress_to_blockTimestamp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
259 | "\u001b[0;31mNameError\u001b[0m: name 'address_to_blockTimestamp' is not defined"
260 | ]
261 | }
262 | ],
263 | "source": [
264 | "address_to_blockTimestamp"
265 | ]
266 | },
267 | {
268 | "cell_type": "code",
269 | "execution_count": null,
270 | "metadata": {
271 | "collapsed": true
272 | },
273 | "outputs": [],
274 | "source": []
275 | }
276 | ],
277 | "metadata": {
278 | "kernelspec": {
279 | "display_name": "Python 3",
280 | "language": "python",
281 | "name": "python3"
282 | },
283 | "language_info": {
284 | "codemirror_mode": {
285 | "name": "ipython",
286 | "version": 3
287 | },
288 | "file_extension": ".py",
289 | "mimetype": "text/x-python",
290 | "name": "python",
291 | "nbconvert_exporter": "python",
292 | "pygments_lexer": "ipython3",
293 | "version": "3.5.2"
294 | }
295 | },
296 | "nbformat": 4,
297 | "nbformat_minor": 2
298 | }
299 |
--------------------------------------------------------------------------------