├── .env.example ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── hashes.txt ├── pyproject.toml ├── requirements.txt └── src ├── core ├── config.py └── logger.py ├── main.py ├── main_speed.py ├── schemas └── parse_schemas.py └── services ├── google_sheets.py └── solscan_parser.py /.env.example: -------------------------------------------------------------------------------- 1 | PYTHONPATH=src 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | token.json 162 | credentials.json 163 | .python-version 164 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: end-of-file-fixer 7 | - id: check-yaml 8 | - id: check-added-large-files 9 | exclude: '\.lock.*' 10 | - id: debug-statements 11 | 12 | - repo: https://github.com/astral-sh/uv-pre-commit 13 | rev: 0.1.27 14 | hooks: 15 | - id: pip-compile 16 | name: '[BUILD] Pip-compile' 17 | args: [requirements.txt, -o, requirements.txt] 18 | 19 | - repo: https://github.com/astral-sh/ruff-pre-commit 20 | rev: v0.3.4 21 | hooks: 22 | - id: ruff 23 | name: '[BE] Ruff' 24 | args: ['--fix', '--unsafe-fixes', '--exit-non-zero-on-fix'] 25 | 26 | - repo: https://github.com/psf/black 27 | rev: 24.3.0 28 | hooks: 29 | - id: black 30 | name: '[BE] Black formatter' 31 | 32 | - repo: https://github.com/pycqa/isort 33 | rev: 5.13.2 34 | hooks: 35 | - id: isort 36 | name: '[BE] Isort imports' 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Alexandr Mudrak 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Simple Selenium Parser for Solscan with Google Sheets Integration 2 | 3 | This project is a Selenium-based scraper that collects data from Solscan and saves it to a Google Sheet. 4 | 5 | ## Prerequisites 6 | 7 | - Python 3.11+ 8 | 9 | ## Installation 10 | 11 | First, clone the repository to your local machine: 12 | 13 | ```bash 14 | git clone https://github.com/alexmudrak/solscan-parser.git 15 | cd solscan-parser 16 | ``` 17 | 18 | Install the dependencies: 19 | 20 | ```bash 21 | pip install -r requirements.txt 22 | ``` 23 | 24 | ## Configuration 25 | 26 | Before running the application, you need to enable the Google Sheets API and download the `credentials.json` file: 27 | 28 | 1. Enable the Google Sheets API by visiting: 29 | [Enable Google Sheets API](https://console.cloud.google.com/apis/enableflow?apiid=sheets.googleapis.com) 30 | 31 | 2. Download the `credentials.json` file from: 32 | [Google API Credentials](https://console.cloud.google.com/apis/credentials) 33 | 34 | Place the `credentials.json` file in the root directory of the project. 35 | 36 | ## Running the Application 37 | 38 | To run the application, use the following command: 39 | 40 | ```bash 41 | python src/main.py 42 | ``` 43 | 44 | ## Usage 45 | 46 | The application will navigate to the Solscan website, extract the required data, and then authenticate with Google to access and update the specified Google Sheet with the transaction data. 47 | -------------------------------------------------------------------------------- /hashes.txt: -------------------------------------------------------------------------------- 1 | # Add a new hash on a new line. Any leading or 2 | # trailing quotation marks will be removed 3 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.pyright] 2 | executionEnvironments = [ 3 | { root = "./", extraPaths = [ "src" ] } 4 | ] 5 | 6 | [tool.black] 7 | line-length = 79 8 | 9 | [tool.isort] 10 | line_length = 79 11 | profile = "black" 12 | 13 | [tool.ruff] 14 | line-length = 79 15 | lint.fixable = ["ALL"] 16 | 17 | [tool.coverage.report] 18 | exclude_lines= [ 19 | "pragma: no cover", 20 | "def __repr__", 21 | "if __name__ == .__main__.:", 22 | ] 23 | 24 | omit = [ 25 | "*/__init__.py", 26 | ] 27 | 28 | [tool.coverage.run] 29 | source = ["src/*"] 30 | 31 | omit = [ 32 | "*/__init__.py", 33 | ] 34 | 35 | [tool.pytest.ini_options] 36 | pythonpath = "src" 37 | python_files = [ 38 | "tests.py", 39 | "test_*.py", 40 | "*_tests.py", 41 | ] 42 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by uv via the following command: 2 | # uv pip compile requirements.txt -o requirements.txt 3 | annotated-types==0.7.0 4 | # via pydantic 5 | attrs==25.1.0 6 | # via 7 | # outcome 8 | # trio 9 | cachetools==5.5.1 10 | # via google-auth 11 | certifi==2024.12.14 12 | # via 13 | # requests 14 | # selenium 15 | charset-normalizer==3.4.1 16 | # via requests 17 | google-api-core==2.24.0 18 | # via google-api-python-client 19 | google-api-python-client==2.159.0 20 | google-auth==2.38.0 21 | # via 22 | # google-api-core 23 | # google-api-python-client 24 | # google-auth-httplib2 25 | # google-auth-oauthlib 26 | google-auth-httplib2==0.2.0 27 | # via google-api-python-client 28 | google-auth-oauthlib==1.2.1 29 | googleapis-common-protos==1.66.0 30 | # via google-api-core 31 | h11==0.14.0 32 | # via wsproto 33 | httplib2==0.22.0 34 | # via 35 | # google-api-python-client 36 | # google-auth-httplib2 37 | idna==3.10 38 | # via 39 | # requests 40 | # trio 41 | oauthlib==3.2.2 42 | # via requests-oauthlib 43 | outcome==1.3.0.post0 44 | # via trio 45 | proto-plus==1.25.0 46 | # via google-api-core 47 | protobuf==5.29.3 48 | # via 49 | # google-api-core 50 | # googleapis-common-protos 51 | # proto-plus 52 | pyasn1==0.6.1 53 | # via 54 | # pyasn1-modules 55 | # rsa 56 | pyasn1-modules==0.4.1 57 | # via google-auth 58 | pydantic==2.10.6 59 | # via pydantic-settings 60 | pydantic-core==2.27.2 61 | # via pydantic 62 | pydantic-settings==2.7.1 63 | pyparsing==3.2.1 64 | # via httplib2 65 | pysocks==1.7.1 66 | # via urllib3 67 | python-dotenv==1.0.1 68 | # via pydantic-settings 69 | requests==2.32.3 70 | # via 71 | # google-api-core 72 | # requests-oauthlib 73 | # undetected-chromedriver 74 | requests-oauthlib==2.0.0 75 | # via google-auth-oauthlib 76 | rsa==4.9 77 | # via google-auth 78 | selenium==4.28.1 79 | # via undetected-chromedriver 80 | sniffio==1.3.1 81 | # via trio 82 | sortedcontainers==2.4.0 83 | # via trio 84 | trio==0.28.0 85 | # via 86 | # selenium 87 | # trio-websocket 88 | trio-websocket==0.11.1 89 | # via selenium 90 | typing-extensions==4.12.2 91 | # via 92 | # pydantic 93 | # pydantic-core 94 | # selenium 95 | undetected-chromedriver==3.5.5 96 | uritemplate==4.1.1 97 | # via google-api-python-client 98 | urllib3==2.3.0 99 | # via 100 | # requests 101 | # selenium 102 | websocket-client==1.8.0 103 | # via selenium 104 | websockets==14.2 105 | # via undetected-chromedriver 106 | wsproto==1.2.0 107 | # via trio-websocket 108 | -------------------------------------------------------------------------------- /src/core/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from pydantic_settings import BaseSettings, SettingsConfigDict 5 | 6 | 7 | class Settings(BaseSettings): 8 | model_config = SettingsConfigDict( 9 | extra="allow", 10 | env_file=".env", 11 | env_file_encoding="utf-8", 12 | ) 13 | 14 | # Logger config 15 | log_level: str = "INFO" 16 | logging.basicConfig(level=log_level) 17 | 18 | if log_level != "DEBUG": 19 | logging.getLogger("googleapiclient").setLevel(logging.WARNING) 20 | logging.getLogger("undetected_chromedriver").setLevel(logging.WARNING) 21 | 22 | # Source settings 23 | hashes_file_path: str | None = os.getenv("HASHES_FILE_PATH", None) 24 | 25 | # Parse settings 26 | main_url: str = "https://solscan.io/account/" 27 | 28 | # Google sheets settings 29 | sheet_scopes: list[str] = [ 30 | "https://www.googleapis.com/auth/spreadsheets", 31 | "https://www.googleapis.com/auth/drive", 32 | ] 33 | sheet_title: str = "[RESULT] SOLSCAN parse" 34 | sheet_first_list_name: str = "Result" 35 | sheet_range: str = f"{sheet_first_list_name}!A2:F" 36 | sheet_headers: list[str] = [ 37 | "Date", 38 | "Hash", 39 | "SOL count", 40 | "SOL usd", 41 | "SPL count", 42 | "SPL usd", 43 | ] 44 | 45 | 46 | settings = Settings() 47 | -------------------------------------------------------------------------------- /src/core/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | class CustomFormatter(logging.Formatter): 5 | def format(self, record): 6 | record.levelname = f"{record.levelname}:" 7 | return super().format(record) 8 | 9 | 10 | def get_logger( 11 | logger_name: str, 12 | ): 13 | logger = logging.getLogger(logger_name) 14 | 15 | if not logger.handlers: 16 | formatter = CustomFormatter( 17 | "%(levelname)-9s %(asctime)s - " "[%(name)s] - %(message)s" 18 | ) 19 | 20 | console_handler = logging.StreamHandler() 21 | console_handler.setFormatter(formatter) 22 | logger.addHandler(console_handler) 23 | 24 | logger.propagate = False 25 | 26 | return logger 27 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | from core.config import settings 2 | from core.logger import get_logger 3 | from services.solscan_parser import SolscanParser 4 | 5 | logger = get_logger(__name__) 6 | 7 | 8 | def read_hashes_from_file(file_path: str) -> list[str]: 9 | try: 10 | with open(file_path, "r") as file: 11 | return [ 12 | line.replace('"', "").replace("'", "").strip() 13 | for line in file 14 | if line.strip() and not line.startswith("#") 15 | ] 16 | except Exception as e: 17 | logger.exception( 18 | "An error occurred while reading hashes from the file." 19 | ) 20 | raise e 21 | 22 | 23 | if __name__ == "__main__": 24 | # TODO: get hashes from Google sheet 25 | file_path = settings.hashes_file_path 26 | if not file_path: 27 | msg = "The path to the hashes file is not specified in the settings." 28 | logger.error(msg) 29 | raise ValueError(msg) 30 | 31 | hashes = read_hashes_from_file(file_path) 32 | 33 | with SolscanParser(hashes) as parser: 34 | parser.process_hashes() 35 | -------------------------------------------------------------------------------- /src/main_speed.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import time 3 | 4 | import undetected_chromedriver as uc 5 | from curl_cffi.requests import AsyncSession 6 | 7 | UPDATED = {"status": False} 8 | COOKIES = {} 9 | COOKIES_URL = "https://solscan.io/account/" 10 | ACCOUNT_HASH = "H6ahDptbaMtEp2Kk4CBVqHbUbZTj7WSxyPP5Yc8C7ngY" 11 | API_ACCOUNT_URL = ( 12 | "https://api.solscan.io/v2/account?address={account_hash}&cluster=" 13 | ) 14 | API_SPL_URL = "https://api.solscan.io/v2/account/v2/tokens?address={account_hash}&cluster=" 15 | 16 | USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Apple WebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36" 17 | HEADERS = { 18 | "au-be": "%10%06yQQwT%07zPY%04%1C", 19 | "referer": "https://solscan.io/", 20 | "origin": "https://solscan.io", 21 | "User-Agent": USER_AGENT, 22 | } 23 | MAX_RETRIES = 10 24 | 25 | 26 | def get_cookies(url: str = COOKIES_URL): 27 | if not UPDATED["status"]: 28 | UPDATED["status"] = True 29 | chrome_options = uc.ChromeOptions() 30 | chrome_options.add_argument("--disable-popup-blocking") 31 | # chrome_options.add_argument("--headless") 32 | 33 | driver = uc.Chrome(use_subprocess=True, options=chrome_options) 34 | driver.get(url) 35 | driver.execute_script(f"window.open('{url}', '_blank')") 36 | time.sleep(10) 37 | 38 | cookies = driver.get_cookies() 39 | for item in cookies: 40 | if item.get("name") == "cf_clearance": 41 | COOKIES["cf_clearance"] = item.get("value") 42 | 43 | 44 | async def get_request(num: int, hash: str, retry: int = 0): 45 | current_retry = retry 46 | if current_retry < MAX_RETRIES: 47 | async with AsyncSession() as client: 48 | try: 49 | account = await client.get( 50 | API_ACCOUNT_URL.format(account_hash=hash), 51 | headers=HEADERS, 52 | cookies=COOKIES, 53 | timeout=60, 54 | ) 55 | spl = await client.get( 56 | API_SPL_URL.format(account_hash=hash), 57 | headers=HEADERS, 58 | cookies=COOKIES, 59 | timeout=60, 60 | ) 61 | result = (account, spl) 62 | UPDATED["status"] = False 63 | print(f"#{num} - {account.status_code} - {spl.status_code}") 64 | except Exception: 65 | if not UPDATED["status"]: 66 | # print(UPDATED["status"]) 67 | print(f"#{num} - Trying get new cookies") 68 | get_cookies() 69 | result = await get_request(num, hash, current_retry + 1) 70 | 71 | return result 72 | return None 73 | 74 | 75 | async def runner(): 76 | # tasks = [get_request(i, API_URL) for i in range(100)] 77 | # await asyncio.gather(*tasks) 78 | await get_request(1, ACCOUNT_HASH) 79 | 80 | 81 | if __name__ == "__main__": 82 | get_cookies() 83 | asyncio.run(runner()) 84 | -------------------------------------------------------------------------------- /src/schemas/parse_schemas.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class SolscanResult(BaseModel): 7 | date: datetime = datetime.now() 8 | hash: str = "-" 9 | sol_count: str = "-" 10 | sol_usd: str = "-" 11 | spl_count: str = "-" 12 | spl_usd: str = "-" 13 | -------------------------------------------------------------------------------- /src/services/google_sheets.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from google.auth.exceptions import GoogleAuthError 4 | from google.auth.external_account_authorized_user import ( 5 | Credentials as FlowCredentials, 6 | ) 7 | from google.auth.transport.requests import Request 8 | from google.oauth2.credentials import Credentials 9 | from google_auth_oauthlib.flow import InstalledAppFlow 10 | from googleapiclient.discovery import build 11 | from googleapiclient.errors import HttpError 12 | 13 | from core.config import settings 14 | from core.logger import get_logger 15 | from schemas.parse_schemas import SolscanResult 16 | 17 | logger = get_logger(__name__) 18 | 19 | 20 | class GoogleSheets: 21 | def __init__(self): 22 | self.token_path = "token.json" 23 | self.credentials_path = "credentials.json" 24 | 25 | self.scopes = settings.sheet_scopes 26 | self.title = settings.sheet_title 27 | self.range = settings.sheet_range 28 | self.list = settings.sheet_first_list_name 29 | self.headers = settings.sheet_headers 30 | self.creds = self.initialize_credentials() 31 | 32 | def initialize_credentials(self) -> Credentials | FlowCredentials: 33 | logger.info("Initializing Google Sheets credentials.") 34 | 35 | creds = None 36 | if os.path.exists(self.token_path): 37 | try: 38 | creds = Credentials.from_authorized_user_file( 39 | self.token_path, self.scopes 40 | ) 41 | logger.info("Loaded credentials from token file.") 42 | except Exception as e: 43 | logger.error( 44 | f"Failed to load credentials from token file: {e}" 45 | ) 46 | if not creds or not creds.valid: 47 | creds = self.refresh_credentials(creds) 48 | 49 | return creds 50 | 51 | def refresh_credentials( 52 | self, creds: Credentials | FlowCredentials | None 53 | ) -> Credentials | FlowCredentials: 54 | if creds and creds.expired and creds.refresh_token: 55 | try: 56 | creds.refresh(Request()) 57 | logger.info("Credentials refreshed successfully.") 58 | except Exception as e: 59 | logger.error(f"Failed to refresh credentials: {e}") 60 | creds = self.run_authentication_flow() 61 | else: 62 | creds = self.run_authentication_flow() 63 | self.save_credentials(creds) 64 | return creds 65 | 66 | def run_authentication_flow(self) -> Credentials | FlowCredentials: 67 | logger.info("Running authentication flow.") 68 | try: 69 | flow = InstalledAppFlow.from_client_secrets_file( 70 | self.credentials_path, self.scopes 71 | ) 72 | creds = flow.run_local_server(port=0) 73 | logger.info("Authentication flow completed.") 74 | except Exception as e: 75 | logger.error(f"Authentication flow failed: {e}") 76 | raise e 77 | return creds 78 | 79 | def save_credentials(self, creds: Credentials | FlowCredentials): 80 | logger.info("Saving credentials to file.") 81 | try: 82 | with open(self.token_path, "w") as token_file: 83 | token_file.write(creds.to_json()) 84 | logger.info("Credentials saved successfully.") 85 | except IOError as e: 86 | logger.error(f"Unable to save credentials: {e}") 87 | 88 | def manage_spreadsheet(self, data: SolscanResult) -> bool: 89 | try: 90 | drive_service = build("drive", "v3", credentials=self.creds) 91 | sheet_service = build("sheets", "v4", credentials=self.creds) 92 | spreadsheet_id = self.find_or_create_spreadsheet( 93 | drive_service, sheet_service 94 | ) 95 | if spreadsheet_id: 96 | logger.info(f"Try to add data: {data}") 97 | self.update_sheet(sheet_service, spreadsheet_id, data) 98 | return True 99 | except (HttpError, GoogleAuthError) as err: 100 | logger.error(f"Error managing spreadsheet: {err}") 101 | return False 102 | 103 | def find_or_create_spreadsheet(self, drive_service, sheet_service) -> str: 104 | logger.debug("Finding or creating spreadsheet.") 105 | try: 106 | response = ( 107 | drive_service.files() 108 | .list( 109 | q=f"name='{self.title}' and mimeType='application/vnd.google-apps.spreadsheet'", 110 | spaces="drive", 111 | fields="files(id, name)", 112 | ) 113 | .execute() 114 | ) 115 | files = response.get("files", []) 116 | if files: 117 | logger.info("Spreadsheet found.") 118 | return files[0].get("id") 119 | else: 120 | logger.info("Spreadsheet not found, creating a new one.") 121 | return self.create_spreadsheet(sheet_service) 122 | except Exception as e: 123 | logger.error(f"Error finding or creating spreadsheet: {e}") 124 | raise e 125 | 126 | def create_spreadsheet(self, sheet_service) -> str: 127 | logger.info("Creating a new spreadsheet.") 128 | try: 129 | spreadsheet = ( 130 | sheet_service.spreadsheets() 131 | .create(body={"properties": {"title": self.title}}) 132 | .execute() 133 | ) 134 | spreadsheet_id = spreadsheet.get("spreadsheetId") 135 | self.setup_sheet(sheet_service, spreadsheet_id) 136 | logger.info("New spreadsheet created.") 137 | return spreadsheet_id 138 | except Exception as e: 139 | logger.error(f"Error creating spreadsheet: {e}") 140 | raise e 141 | 142 | def setup_sheet( 143 | self, 144 | sheet_service, 145 | spreadsheet_id: str, 146 | ): 147 | logger.info("Setting up the spreadsheet.") 148 | try: 149 | sheet_id = ( 150 | sheet_service.spreadsheets() 151 | .get(spreadsheetId=spreadsheet_id) 152 | .execute()["sheets"][0]["properties"]["sheetId"] 153 | ) 154 | batch_update_spreadsheet_request_body = { 155 | "requests": [ 156 | { 157 | "updateSheetProperties": { 158 | "properties": { 159 | "sheetId": sheet_id, 160 | "title": self.list, 161 | }, 162 | "fields": "title", 163 | } 164 | } 165 | ] 166 | } 167 | sheet_service.spreadsheets().batchUpdate( 168 | spreadsheetId=spreadsheet_id, 169 | body=batch_update_spreadsheet_request_body, 170 | ).execute() 171 | sheet_service.spreadsheets().values().update( 172 | spreadsheetId=spreadsheet_id, 173 | range=f"{self.list}!A1", 174 | body={"values": [self.headers]}, 175 | valueInputOption="USER_ENTERED", 176 | ).execute() 177 | logger.info("Spreadsheet setup completed.") 178 | except Exception as e: 179 | logger.error(f"Error setting up spreadsheet: {e}") 180 | raise e 181 | 182 | def update_sheet( 183 | self, 184 | sheet_service, 185 | spreadsheet_id: str, 186 | data: SolscanResult, 187 | ): 188 | logger.debug("Updating the spreadsheet with new data.") 189 | try: 190 | values = [ 191 | [ 192 | str(data.date), 193 | data.hash, 194 | data.sol_count, 195 | data.sol_usd, 196 | data.spl_count, 197 | data.spl_usd, 198 | ], 199 | ] 200 | body = {"values": values} 201 | sheet_service.spreadsheets().values().append( 202 | spreadsheetId=spreadsheet_id, 203 | range=self.range, 204 | valueInputOption="USER_ENTERED", 205 | body=body, 206 | ).execute() 207 | logger.debug("Data appended to the spreadsheet successfully.") 208 | except Exception as e: 209 | logger.error(f"Error updating the spreadsheet: {e}") 210 | raise e 211 | -------------------------------------------------------------------------------- /src/services/solscan_parser.py: -------------------------------------------------------------------------------- 1 | import time 2 | from datetime import datetime 3 | from urllib.parse import urljoin 4 | 5 | import undetected_chromedriver as uc 6 | from selenium.common.exceptions import TimeoutException 7 | from selenium.webdriver.common.by import By 8 | from selenium.webdriver.remote.webelement import WebElement 9 | from selenium.webdriver.support import expected_conditions as EC 10 | from selenium.webdriver.support.wait import WebDriverWait 11 | 12 | from core.config import settings 13 | from core.logger import get_logger 14 | from schemas.parse_schemas import SolscanResult 15 | from services.google_sheets import GoogleSheets 16 | 17 | logger = get_logger(__name__) 18 | 19 | 20 | class SolscanParser: 21 | def __init__(self, hashes: list[str]): 22 | self.google_sheets = GoogleSheets() 23 | self.url = settings.main_url 24 | self.hashes = hashes 25 | self.driver = None 26 | 27 | def __enter__(self): 28 | logger.info("Entering to the browser...") 29 | 30 | chrome_options = uc.ChromeOptions() 31 | chrome_options.add_argument("--disable-popup-blocking") 32 | chrome_options.add_argument("--headless") 33 | 34 | self.driver = uc.Chrome(use_subprocess=True, options=chrome_options) 35 | 36 | return self 37 | 38 | def __exit__(self, exc_type, exc_value, _): 39 | logger.info("Exiting from the browser...") 40 | if exc_type: 41 | logger.error(f"An exception occurred: {exc_value}") 42 | 43 | if self.driver: 44 | self.driver.close() 45 | 46 | return False 47 | 48 | def proccess_sol( 49 | self, result: SolscanResult, elem: WebElement 50 | ) -> SolscanResult: 51 | logger.debug(f"Trying process SOL for {result.hash}: {elem.text}") 52 | if elem: 53 | row_text = elem.text 54 | sol_balance = row_text.split(" ")[0].strip() 55 | result.sol_count = sol_balance 56 | 57 | return result 58 | 59 | def proccess_spl( 60 | self, result: SolscanResult, elem: WebElement 61 | ) -> SolscanResult: 62 | logger.debug(f"Trying process SPL for {result.hash}: {elem.text}") 63 | if elem: 64 | row_text = elem.text 65 | spl_count, spl_usd = row_text.split("\n") 66 | spl_count = spl_count.split(" ")[0] 67 | spl_usd = ( 68 | spl_usd.replace("(", "").replace(")", "").replace("$", "") 69 | ) 70 | 71 | result.spl_count = spl_count 72 | result.spl_usd = spl_usd 73 | 74 | return result 75 | 76 | def parse_sol_values(self, result: SolscanResult, driver) -> SolscanResult: 77 | logger.debug(f"Trying parse SOL for {result.hash}") 78 | sol_balance_element = WebDriverWait(driver, 5).until( 79 | EC.presence_of_element_located( 80 | ( 81 | By.XPATH, 82 | ("//div[text()='SOL Balance']/following::div"), 83 | ) 84 | ) 85 | ) 86 | result = self.proccess_sol(result, sol_balance_element) 87 | 88 | return result 89 | 90 | def parse_spl_values(self, result: SolscanResult, driver) -> SolscanResult: 91 | logger.debug(f"Trying parse SPL for {result.hash}") 92 | spl_balance_element = WebDriverWait(driver, 5).until( 93 | EC.presence_of_element_located( 94 | ( 95 | By.XPATH, 96 | ("//div[text()='Token Balance']/following::div"), 97 | ) 98 | ) 99 | ) 100 | result = self.proccess_spl(result, spl_balance_element) 101 | 102 | return result 103 | 104 | def fix_cf_just_moment(self, url: str, driver): 105 | # Fix CF `Just moment...` loading 106 | driver.execute_script(f"window.open('{url}', '_blank')") 107 | driver.switch_to.window(driver.window_handles[1]) 108 | time.sleep(3) 109 | driver.close() 110 | 111 | def get_parse_result(self, hash: str) -> SolscanResult: 112 | if not self.driver: 113 | msg = "Please use context for the `SolscanParser`" 114 | logger.critical(msg) 115 | raise ValueError(msg) 116 | 117 | result = SolscanResult(date=datetime.now(), hash=hash) 118 | url = urljoin(self.url, hash) 119 | 120 | logger.info(f"Try parse: {result.hash}") 121 | 122 | driver = self.driver 123 | driver.get(url) 124 | 125 | self.fix_cf_just_moment(url, driver) 126 | driver.switch_to.window(driver.window_handles[0]) 127 | 128 | try: 129 | result = self.parse_sol_values(result, driver) 130 | result = self.parse_spl_values(result, driver) 131 | except TimeoutException: 132 | logger.info( 133 | f"Can't found SQL Balance or Token values: {result.hash}" 134 | ) 135 | pass 136 | 137 | return result 138 | 139 | def process_hashes(self): 140 | for hash in self.hashes: 141 | parse_result = self.get_parse_result(hash) 142 | self.google_sheets.manage_spreadsheet(parse_result) 143 | --------------------------------------------------------------------------------