├── pyproject.toml ├── .gitignore ├── setup.py ├── LICENSE ├── README.md └── src └── dune_analytics └── __init__.py /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Python 2 | build/ 3 | develop-eggs/ 4 | dist/ 5 | downloads/ 6 | eggs/ 7 | .eggs/ 8 | lib/ 9 | lib64/ 10 | parts/ 11 | sdist/ 12 | var/ 13 | wheels/ 14 | share/python-wheels/ 15 | *.egg-info/ 16 | .installed.cfg 17 | *.egg 18 | MANIFEST 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="dune_analytics", 8 | version="0.0.1", 9 | author="Justin Martin", 10 | author_email="justin@jmart.me", 11 | description="An interface for querying Dune Analytics", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/thefrozenfire/dune-analytics", 15 | project_urls={ 16 | "Bug Tracker": "https://github.com/thefrozenfire/dune-analytics/issues", 17 | }, 18 | classifiers=[ 19 | "Programming Language :: Python :: 3", 20 | "License :: OSI Approved :: MIT License", 21 | "Operating System :: OS Independent", 22 | ], 23 | package_dir={"": "src"}, 24 | packages=setuptools.find_packages(where="src"), 25 | python_requires=">=3.6", 26 | install_requires=['gql[aiohttp]'], 27 | ) 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright © 2021 Justin Martin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dune Analytics 2 | 3 | This is a client which leverages the GraphQL interface supplied by Dune Analytics to run queries and load their results 4 | into memory. This can be used to, for instance, pull data from DA into a Pandas DataFrame, for more complex analysis. 5 | 6 | > **Disclaimer:** This package is not in any respect developed or endorsed by Dune Analytics, nor is the maintainer 7 | > associated with Dune Analytics in any way. 8 | 9 | [![pyversion][pyversion-image]][pyversion-url] 10 | [![pypi][pypi-image]][pypi-url] 11 | 12 | Tip me at [0xD660994dfD06A7d33C779E77bBd7D71b3C9C6AeA](https://etherscan.io/address/0xD660994dfD06A7d33C779E77bBd7D71b3C9C6AeA) 13 | 14 | ## Installation 15 | 16 | $ pip install --pre dune_analytics 17 | 18 | ## Usage 19 | 20 | ### Basic Usage 21 | 22 | ```python 23 | from dune_analytics import Dune 24 | 25 | dune = Dune(dune_username, dune_password) 26 | 27 | results = dune.query(''' 28 | SELECT 29 | * 30 | FROM tornado_cash."eth_call_withdraw" 31 | LIMIT 100 32 | ''') 33 | ``` 34 | 35 | ## License 36 | 37 | [MIT License](https://github.com/thefrozenfire/dune-analytics/blob/master/LICENSE) 38 | 39 | -------------------------------------------------------------------------------- /src/dune_analytics/__init__.py: -------------------------------------------------------------------------------- 1 | from gql import gql, Client 2 | from gql.transport.requests import RequestsHTTPTransport 3 | import time 4 | 5 | class Dune: 6 | def __init__(self, username=None, password=None, transport=None): 7 | 8 | if transport is None: 9 | transport = RequestsHTTPTransport(url="https://core-hsr.duneanalytics.com/v1/graphql") 10 | 11 | self.client = Client(transport=transport, fetch_schema_from_transport=True) 12 | 13 | if username and password: 14 | self.authenticate(username, password) 15 | 16 | def authenticate(self, username, password): 17 | transport = self.client.transport 18 | 19 | csrf = transport.session.post("https://duneanalytics.com/api/auth/csrf").json()['csrf'] 20 | r_auth = transport.session.post("https://duneanalytics.com/api/auth", data={ 21 | "csrf": csrf, 22 | "action": "login", 23 | "username": username, 24 | "password": password 25 | }, headers={ 26 | "Origin": "https://duneanalytics.com", 27 | "Referer": "https://duneanalytics.com/auth/login" 28 | }, allow_redirects=False) 29 | 30 | self.refresh_session() 31 | 32 | def refresh_session(self): 33 | r_session = self.client.transport.session.post("https://duneanalytics.com/api/auth/session") 34 | dune_session = r_session.json() 35 | 36 | if 'token' in dune_session: 37 | self.sub = dune_session['sub'] 38 | self.client.transport.session.headers['Authorization'] = "Bearer " + dune_session['token'] 39 | 40 | self.user = self.find_session_user() 41 | 42 | def find_session_user(self): 43 | gql_query = gql(""" 44 | query FindSessionUser($sub: uuid!) { 45 | users(where: { 46 | private_info: { 47 | cognito_id: {_eq: $sub} 48 | } 49 | }) { ...SessionUser __typename } 50 | } 51 | fragment SessionUser on users { 52 | id name profile_image_url memberships { 53 | group { ...Group __typename } 54 | __typename 55 | } __typename 56 | } 57 | fragment Group on groups { id type permissions __typename } 58 | """ 59 | ) 60 | 61 | result = self.client.execute(gql_query, operation_name="FindSessionUser", variable_values={"sub": self.sub}) 62 | return result['users'][0] 63 | 64 | def query(self, query): 65 | self.refresh_session() 66 | new_query = self.upsert_query(query) 67 | 68 | job_id = self.execute_query(new_query['id']) 69 | self.wait_for_job(job_id) 70 | 71 | return self.find_result_data_by_job(job_id) 72 | 73 | def list_tables(self, name_filter=None, dataset_id=4, limit=50): 74 | gql_query = gql(''' 75 | query ListSchemas($dataset_id: Int!, $query: [blockchain_schemas_bool_exp], $offset: Int!, $limit: Int!) { 76 | blockchain_schemas( 77 | where: {dataset_id: {_eq: $dataset_id}, _and: $query} 78 | order_by: [{schema: asc}, {table: asc}] 79 | distinct_on: [schema, table] 80 | offset: $offset 81 | limit: $limit 82 | ) { 83 | schema 84 | table 85 | __typename 86 | } 87 | } 88 | ''') 89 | 90 | query = [] 91 | offset = 0 92 | 93 | if limit > 10000: 94 | raise Exception("Please don't abuse Dune's free service") 95 | 96 | if name_filter is not None: 97 | query.append({ 98 | "full_name": { 99 | "_ilike": '%' + name_filter + '%' 100 | } 101 | }) 102 | 103 | while True: 104 | result = self.client.execute(gql_query, variable_values={ 105 | "dataset_id": dataset_id, 106 | "limit": limit, 107 | "query": query, 108 | "offset": offset 109 | }) 110 | 111 | if len(result['blockchain_schemas']) == 0: 112 | break 113 | 114 | for table in result['blockchain_schemas']: 115 | yield {"schema": table['schema'], "table": table['table']} 116 | 117 | offset += limit 118 | time.sleep(1) # Let's be gentle. Dune is growing 119 | 120 | def list_columns(self, schema, table, dataset_id=4, limit=50): 121 | gql_query = gql(''' 122 | query ListColumns($dataset_id: Int!, $schema: String!, $table: String!, $limit: Int!) { 123 | blockchain_schemas( 124 | where: {dataset_id: {_eq: $dataset_id}, schema: {_eq: $schema}, table: {_eq: $table}} 125 | order_by: {column_name: asc} 126 | limit: $limit 127 | ) { 128 | column_name 129 | data_type 130 | __typename 131 | } 132 | } 133 | ''') 134 | 135 | result = self.client.execute(gql_query, variable_values={ 136 | "schema": schema, 137 | "table": table, 138 | "dataset_id": dataset_id, 139 | "limit": limit 140 | }) 141 | 142 | for column in result['blockchain_schemas']: 143 | yield { "name": columns['column_name'], "data_type": column['data_type'] } 144 | 145 | def upsert_query(self, query, dataset_id=4): 146 | gql_query = gql(""" 147 | mutation UpsertQuery($session_id: Int!, $object: queries_insert_input!, $on_conflict: queries_on_conflict!, $favs_last_24h: Boolean! = false, $favs_last_7d: Boolean! = false, $favs_last_30d: Boolean! = false, $favs_all_time: Boolean! = true) { 148 | insert_queries_one(object: $object, on_conflict: $on_conflict) { 149 | ...Query 150 | favorite_queries(where: {user_id: {_eq: $session_id}}, limit: 1) { 151 | created_at 152 | __typename 153 | } 154 | __typename 155 | } 156 | } 157 | 158 | fragment Query on queries { 159 | id 160 | dataset_id 161 | name 162 | description 163 | query 164 | private_to_group_id 165 | is_temp 166 | is_archived 167 | created_at 168 | updated_at 169 | schedule 170 | tags 171 | parameters 172 | user { 173 | ...User 174 | __typename 175 | } 176 | visualizations { 177 | id 178 | type 179 | name 180 | options 181 | created_at 182 | __typename 183 | } 184 | favorite_queries_aggregate @include(if: $favs_all_time) { 185 | aggregate { 186 | count 187 | __typename 188 | } 189 | __typename 190 | } 191 | query_favorite_count_last_24h @include(if: $favs_last_24h) { 192 | favorite_count 193 | __typename 194 | } 195 | query_favorite_count_last_7d @include(if: $favs_last_7d) { 196 | favorite_count 197 | __typename 198 | } 199 | query_favorite_count_last_30d @include(if: $favs_last_30d) { 200 | favorite_count 201 | __typename 202 | } 203 | __typename 204 | } 205 | 206 | fragment User on users { 207 | id 208 | name 209 | profile_image_url 210 | __typename 211 | } 212 | """ 213 | ) 214 | 215 | result = self.client.execute(gql_query, operation_name="UpsertQuery", variable_values={ 216 | "favs_last_24h": False, 217 | "favs_last_7d": False, 218 | "favs_last_30d": False, 219 | "favs_all_time": False, 220 | "object": { 221 | "schedule": None, 222 | "dataset_id": dataset_id, 223 | "name": "Jupyter Temporary Query", 224 | "query": query, 225 | "user_id": self.user['id'], 226 | "description": "", 227 | "is_archived": False, 228 | "is_temp": True, 229 | "parameters": [], 230 | "visualizations": { 231 | "data": [ 232 | { 233 | "type": "table", 234 | "name": "Query results", 235 | "options": {} 236 | } 237 | ], 238 | "on_conflict": { 239 | "constraint": "visualizations_pkey", 240 | "update_columns": [ 241 | "name", 242 | "options" 243 | ] 244 | } 245 | } 246 | }, 247 | "on_conflict": { 248 | "constraint": "queries_pkey", 249 | "update_columns": [ 250 | "dataset_id", 251 | "name", 252 | "description", 253 | "query", 254 | "schedule", 255 | "is_archived", 256 | "is_temp", 257 | "tags", 258 | "parameters" 259 | ] 260 | }, 261 | "session_id": self.user['id'] 262 | }) 263 | 264 | return result['insert_queries_one'] 265 | 266 | def execute_query(self, query_id, **kwargs): 267 | gql_query = gql(""" 268 | mutation ExecuteQuery($query_id: Int!, $parameters: [Parameter!]!) { 269 | execute_query(query_id: $query_id, parameters: $parameters) { 270 | job_id 271 | __typename 272 | } 273 | } 274 | """) 275 | 276 | result = self.client.execute(gql_query, operation_name="ExecuteQuery", variable_values={ 277 | "query_id": query_id, 278 | "parameters": [ { "key": key, "type": "text", "value": value} for key, value in kwargs.items() ] 279 | }) 280 | 281 | return result['execute_query']['job_id'] 282 | 283 | def find_result_job(self, job_id): 284 | gql_query = gql(""" 285 | query FindResultJob($job_id: uuid) { 286 | jobs(where: {id: {_eq: $job_id}}) { 287 | id 288 | user_id 289 | locked_until 290 | created_at 291 | category 292 | __typename 293 | } 294 | view_queue_positions(where: {id: {_eq: $job_id}}) { 295 | pos 296 | __typename 297 | } 298 | } 299 | """) 300 | 301 | result = self.client.execute(gql_query, operation_name="FindResultJob", variable_values={ 302 | "job_id": job_id 303 | }) 304 | 305 | return result['jobs'] 306 | 307 | def wait_for_job(self, job_id): 308 | while True: 309 | jobs = self.find_result_job(job_id) 310 | if len(jobs) == 0: 311 | break 312 | else: 313 | time.sleep(1) 314 | 315 | def find_result_data_by_job(self, job_id): 316 | gql_query = gql(""" 317 | query FindResultDataByJob($job_id: uuid!) { 318 | query_results(where: {job_id: {_eq: $job_id}}) { 319 | id 320 | job_id 321 | error 322 | runtime 323 | generated_at 324 | columns 325 | __typename 326 | } 327 | get_result_by_job_id(args: {want_job_id: $job_id}) { 328 | data 329 | __typename 330 | } 331 | } 332 | """) 333 | 334 | result = self.client.execute(gql_query, operation_name="FindResultDataByJob", variable_values={ 335 | "job_id": job_id 336 | }) 337 | 338 | for item in result['get_result_by_job_id']: 339 | yield item['data'] 340 | --------------------------------------------------------------------------------