├── pyproject.toml
├── .gitignore
├── setup.py
├── LICENSE
├── README.md
└── src
    └── dune_analytics
        └── __init__.py


/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=42",
4 |     "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Python
 2 | build/
 3 | develop-eggs/
 4 | dist/
 5 | downloads/
 6 | eggs/
 7 | .eggs/
 8 | lib/
 9 | lib64/
10 | parts/
11 | sdist/
12 | var/
13 | wheels/
14 | share/python-wheels/
15 | *.egg-info/
16 | .installed.cfg
17 | *.egg
18 | MANIFEST
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name="dune_analytics",
 8 |     version="0.0.1",
 9 |     author="Justin Martin",
10 |     author_email="justin@jmart.me",
11 |     description="An interface for querying Dune Analytics",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/thefrozenfire/dune-analytics",
15 |     project_urls={
16 |         "Bug Tracker": "https://github.com/thefrozenfire/dune-analytics/issues",
17 |     },
18 |     classifiers=[
19 |         "Programming Language :: Python :: 3",
20 |         "License :: OSI Approved :: MIT License",
21 |         "Operating System :: OS Independent",
22 |     ],
23 |     package_dir={"": "src"},
24 |     packages=setuptools.find_packages(where="src"),
25 |     python_requires=">=3.6",
26 |     install_requires=['gql[aiohttp]'],
27 | )
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |  The MIT License (MIT)
 2 | 
 3 | Copyright © 2021 Justin Martin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Dune Analytics
 2 | 
 3 | This is a client which leverages the GraphQL interface supplied by Dune Analytics to run queries and load their results
 4 | into memory. This can be used to, for instance, pull data from DA into a Pandas DataFrame, for more complex analysis.
 5 | 
 6 | > **Disclaimer:** This package is not in any respect developed or endorsed by Dune Analytics, nor is the maintainer
 7 | > associated with Dune Analytics in any way.
 8 | 
 9 | [![pyversion][pyversion-image]][pyversion-url]
10 | [![pypi][pypi-image]][pypi-url]
11 | 
12 | Tip me at [0xD660994dfD06A7d33C779E77bBd7D71b3C9C6AeA](https://etherscan.io/address/0xD660994dfD06A7d33C779E77bBd7D71b3C9C6AeA)
13 | 
14 | ## Installation
15 | 
16 |     $ pip install --pre dune_analytics
17 |     
18 | ## Usage
19 | 
20 | ### Basic Usage
21 | 
22 | ```python
23 | from dune_analytics import Dune
24 | 
25 | dune = Dune(dune_username, dune_password)
26 | 
27 | results = dune.query('''
28 |     SELECT
29 |         *
30 |     FROM tornado_cash."eth_call_withdraw"
31 |     LIMIT 100
32 | ''')
33 | ```
34 | 
35 | ## License
36 | 
37 | [MIT License](https://github.com/thefrozenfire/dune-analytics/blob/master/LICENSE)
38 | 
39 | 


--------------------------------------------------------------------------------
/src/dune_analytics/__init__.py:
--------------------------------------------------------------------------------
  1 | from gql import gql, Client
  2 | from gql.transport.requests import RequestsHTTPTransport
  3 | import time
  4 | 
  5 | class Dune:
  6 |     def __init__(self, username=None, password=None, transport=None):
  7 |         
  8 |         if transport is None:
  9 |             transport = RequestsHTTPTransport(url="https://core-hsr.duneanalytics.com/v1/graphql")
 10 | 
 11 |         self.client = Client(transport=transport, fetch_schema_from_transport=True)
 12 | 
 13 |         if username and password:
 14 |             self.authenticate(username, password)
 15 | 
 16 |     def authenticate(self, username, password):
 17 |         transport = self.client.transport
 18 | 
 19 |         csrf = transport.session.post("https://duneanalytics.com/api/auth/csrf").json()['csrf']
 20 |         r_auth = transport.session.post("https://duneanalytics.com/api/auth", data={
 21 |             "csrf": csrf,
 22 |             "action": "login",
 23 |             "username": username,
 24 |             "password": password
 25 |         }, headers={
 26 |             "Origin": "https://duneanalytics.com",
 27 |             "Referer": "https://duneanalytics.com/auth/login"
 28 |         }, allow_redirects=False)
 29 | 
 30 |         self.refresh_session()
 31 |     
 32 |     def refresh_session(self):
 33 |         r_session = self.client.transport.session.post("https://duneanalytics.com/api/auth/session")
 34 |         dune_session = r_session.json()
 35 | 
 36 |         if 'token' in dune_session:
 37 |             self.sub = dune_session['sub']
 38 |             self.client.transport.session.headers['Authorization'] = "Bearer " + dune_session['token']
 39 | 
 40 |             self.user = self.find_session_user()
 41 | 
 42 |     def find_session_user(self):
 43 |         gql_query = gql("""
 44 |             query FindSessionUser($sub: uuid!) {
 45 |                 users(where: {
 46 |                     private_info: {
 47 |                         cognito_id: {_eq: $sub}
 48 |                     }
 49 |                 }) { ...SessionUser    __typename  }
 50 |             }
 51 |             fragment SessionUser on users {
 52 |                 id  name  profile_image_url  memberships {
 53 |                     group { ...Group      __typename    }
 54 |                     __typename
 55 |                 }  __typename
 56 |             }
 57 |             fragment Group on groups {  id  type  permissions  __typename }
 58 |         """
 59 |         )
 60 | 
 61 |         result = self.client.execute(gql_query, operation_name="FindSessionUser", variable_values={"sub": self.sub})
 62 |         return result['users'][0]
 63 | 
 64 |     def query(self, query):
 65 |         self.refresh_session()
 66 |         new_query = self.upsert_query(query)
 67 | 
 68 |         job_id = self.execute_query(new_query['id'])
 69 |         self.wait_for_job(job_id)
 70 | 
 71 |         return self.find_result_data_by_job(job_id)
 72 |         
 73 |     def list_tables(self, name_filter=None, dataset_id=4, limit=50):
 74 |         gql_query = gql('''
 75 |             query ListSchemas($dataset_id: Int!, $query: [blockchain_schemas_bool_exp], $offset: Int!, $limit: Int!) {
 76 |                 blockchain_schemas(
 77 |                     where: {dataset_id: {_eq: $dataset_id}, _and: $query}
 78 |                     order_by: [{schema: asc}, {table: asc}]
 79 |                     distinct_on: [schema, table]
 80 |                     offset: $offset
 81 |                     limit: $limit
 82 |                 ) {
 83 |                     schema
 84 |                     table
 85 |                     __typename
 86 |                 }
 87 |             }
 88 |         ''')
 89 |         
 90 |         query = []
 91 |         offset = 0
 92 |         
 93 |         if limit > 10000:
 94 |             raise Exception("Please don't abuse Dune's free service")
 95 |         
 96 |         if name_filter is not None:
 97 |             query.append({
 98 |                 "full_name": {
 99 |                   "_ilike": '%' + name_filter + '%'
100 |                 }
101 |             })
102 | 
103 |         while True:
104 |             result = self.client.execute(gql_query, variable_values={
105 |                 "dataset_id": dataset_id,
106 |                 "limit": limit,
107 |                 "query": query,
108 |                 "offset": offset
109 |             })
110 |             
111 |             if len(result['blockchain_schemas']) == 0:
112 |                 break
113 | 
114 |             for table in result['blockchain_schemas']:
115 |                 yield {"schema": table['schema'], "table": table['table']}
116 |             
117 |             offset += limit
118 |             time.sleep(1) # Let's be gentle. Dune is growing
119 |     
120 |     def list_columns(self, schema, table, dataset_id=4, limit=50):
121 |         gql_query = gql('''
122 |             query ListColumns($dataset_id: Int!, $schema: String!, $table: String!, $limit: Int!) {
123 |                 blockchain_schemas(
124 |                     where: {dataset_id: {_eq: $dataset_id}, schema: {_eq: $schema}, table: {_eq: $table}}
125 |                     order_by: {column_name: asc}
126 |                     limit: $limit
127 |                 ) {
128 |                     column_name
129 |                     data_type
130 |                     __typename
131 |                 }
132 |             }
133 |         ''')
134 |        
135 |         result = self.client.execute(gql_query, variable_values={
136 |             "schema": schema,
137 |             "table": table,
138 |             "dataset_id": dataset_id,
139 |             "limit": limit
140 |         })
141 |         
142 |         for column in result['blockchain_schemas']:
143 |             yield { "name": columns['column_name'], "data_type": column['data_type'] }
144 | 
145 |     def upsert_query(self, query, dataset_id=4):
146 |         gql_query = gql("""
147 |             mutation UpsertQuery($session_id: Int!, $object: queries_insert_input!, $on_conflict: queries_on_conflict!, $favs_last_24h: Boolean! = false, $favs_last_7d: Boolean! = false, $favs_last_30d: Boolean! = false, $favs_all_time: Boolean! = true) {
148 |                 insert_queries_one(object: $object, on_conflict: $on_conflict) {
149 |                     ...Query
150 |                     favorite_queries(where: {user_id: {_eq: $session_id}}, limit: 1) {
151 |                         created_at
152 |                         __typename
153 |                     }
154 |                     __typename
155 |                 }
156 |             }
157 | 
158 |             fragment Query on queries {
159 |                 id
160 |                 dataset_id
161 |                 name
162 |                 description
163 |                 query
164 |                 private_to_group_id
165 |                 is_temp
166 |                 is_archived
167 |                 created_at
168 |                 updated_at
169 |                 schedule
170 |                 tags
171 |                 parameters
172 |                 user {
173 |                     ...User
174 |                     __typename
175 |                 }
176 |                 visualizations {
177 |                     id
178 |                     type
179 |                     name
180 |                     options
181 |                     created_at
182 |                     __typename
183 |                 }
184 |                 favorite_queries_aggregate @include(if: $favs_all_time) {
185 |                     aggregate {
186 |                     count
187 |                     __typename
188 |                     }
189 |                     __typename
190 |                 }
191 |                 query_favorite_count_last_24h @include(if: $favs_last_24h) {
192 |                     favorite_count
193 |                     __typename
194 |                 }
195 |                 query_favorite_count_last_7d @include(if: $favs_last_7d) {
196 |                     favorite_count
197 |                     __typename
198 |                 }
199 |                 query_favorite_count_last_30d @include(if: $favs_last_30d) {
200 |                     favorite_count
201 |                     __typename
202 |                 }
203 |                 __typename
204 |             }
205 | 
206 |             fragment User on users {
207 |                 id
208 |                 name
209 |                 profile_image_url
210 |                 __typename
211 |             }
212 |         """
213 |         )
214 | 
215 |         result = self.client.execute(gql_query, operation_name="UpsertQuery", variable_values={
216 |             "favs_last_24h": False,
217 |             "favs_last_7d": False,
218 |             "favs_last_30d": False,
219 |             "favs_all_time": False,
220 |             "object": {
221 |             "schedule": None,
222 |             "dataset_id": dataset_id,
223 |             "name": "Jupyter Temporary Query",
224 |             "query": query,
225 |             "user_id": self.user['id'],
226 |             "description": "",
227 |             "is_archived": False,
228 |             "is_temp": True,
229 |             "parameters": [],
230 |             "visualizations": {
231 |                 "data": [
232 |                 {
233 |                     "type": "table",
234 |                     "name": "Query results",
235 |                     "options": {}
236 |                 }
237 |                 ],
238 |                 "on_conflict": {
239 |                 "constraint": "visualizations_pkey",
240 |                 "update_columns": [
241 |                     "name",
242 |                     "options"
243 |                 ]
244 |                 }
245 |             }
246 |             },
247 |             "on_conflict": {
248 |             "constraint": "queries_pkey",
249 |             "update_columns": [
250 |                 "dataset_id",
251 |                 "name",
252 |                 "description",
253 |                 "query",
254 |                 "schedule",
255 |                 "is_archived",
256 |                 "is_temp",
257 |                 "tags",
258 |                 "parameters"
259 |             ]
260 |             },
261 |             "session_id": self.user['id']
262 |         })
263 | 
264 |         return result['insert_queries_one']
265 | 
266 |     def execute_query(self, query_id, **kwargs):
267 |         gql_query = gql("""
268 |             mutation ExecuteQuery($query_id: Int!, $parameters: [Parameter!]!) {
269 |                 execute_query(query_id: $query_id, parameters: $parameters) {
270 |                     job_id
271 |                     __typename
272 |                 }
273 |             }
274 |         """)
275 | 
276 |         result = self.client.execute(gql_query, operation_name="ExecuteQuery", variable_values={
277 |             "query_id": query_id,
278 |             "parameters": [ { "key": key, "type": "text", "value": value} for key, value in kwargs.items() ]
279 |         })
280 | 
281 |         return result['execute_query']['job_id']
282 | 
283 |     def find_result_job(self, job_id):
284 |         gql_query = gql("""
285 |             query FindResultJob($job_id: uuid) {
286 |                 jobs(where: {id: {_eq: $job_id}}) {
287 |                     id
288 |                     user_id
289 |                     locked_until
290 |                     created_at
291 |                     category
292 |                     __typename
293 |                 }
294 |                 view_queue_positions(where: {id: {_eq: $job_id}}) {
295 |                     pos
296 |                     __typename
297 |                 }
298 |             }
299 |         """)
300 | 
301 |         result = self.client.execute(gql_query, operation_name="FindResultJob", variable_values={
302 |             "job_id": job_id
303 |         })
304 | 
305 |         return result['jobs']
306 | 
307 |     def wait_for_job(self, job_id):
308 |         while True:
309 |             jobs = self.find_result_job(job_id)
310 |             if len(jobs) == 0:
311 |                 break
312 |             else:
313 |                 time.sleep(1)
314 | 
315 |     def find_result_data_by_job(self, job_id):
316 |         gql_query = gql("""
317 |             query FindResultDataByJob($job_id: uuid!) {
318 |                 query_results(where: {job_id: {_eq: $job_id}}) {
319 |                     id
320 |                     job_id
321 |                     error
322 |                     runtime
323 |                     generated_at
324 |                     columns
325 |                     __typename
326 |                 }
327 |                 get_result_by_job_id(args: {want_job_id: $job_id}) {
328 |                     data
329 |                     __typename
330 |                 }
331 |             }
332 |         """)
333 | 
334 |         result = self.client.execute(gql_query, operation_name="FindResultDataByJob", variable_values={
335 |             "job_id": job_id
336 |         })
337 | 
338 |         for item in result['get_result_by_job_id']:
339 |             yield item['data']
340 | 


--------------------------------------------------------------------------------