├── .env.example
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── hashes.txt
├── pyproject.toml
├── requirements.txt
└── src
    ├── core
        ├── config.py
        └── logger.py
    ├── main.py
    ├── main_speed.py
    ├── schemas
        └── parse_schemas.py
    └── services
        ├── google_sheets.py
        └── solscan_parser.py


/.env.example:
--------------------------------------------------------------------------------
1 | PYTHONPATH=src
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | token.json
162 | credentials.json
163 | .python-version
164 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.4.0
 4 |     hooks:
 5 |       - id: trailing-whitespace
 6 |       - id: end-of-file-fixer
 7 |       - id: check-yaml
 8 |       - id: check-added-large-files
 9 |         exclude: '\.lock.*'
10 |       - id: debug-statements
11 | 
12 |   - repo: https://github.com/astral-sh/uv-pre-commit
13 |     rev: 0.1.27
14 |     hooks:
15 |       - id: pip-compile
16 |         name: '[BUILD] Pip-compile'
17 |         args: [requirements.txt, -o, requirements.txt]
18 | 
19 |   - repo: https://github.com/astral-sh/ruff-pre-commit
20 |     rev: v0.3.4
21 |     hooks:
22 |       - id: ruff
23 |         name: '[BE] Ruff'
24 |         args: ['--fix', '--unsafe-fixes', '--exit-non-zero-on-fix']
25 | 
26 |   - repo: https://github.com/psf/black
27 |     rev: 24.3.0
28 |     hooks:
29 |       - id: black
30 |         name: '[BE] Black formatter'
31 | 
32 |   - repo: https://github.com/pycqa/isort
33 |     rev: 5.13.2
34 |     hooks:
35 |       - id: isort
36 |         name: '[BE] Isort imports'
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Alexandr Mudrak
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Simple Selenium Parser for Solscan with Google Sheets Integration
 2 | 
 3 | This project is a Selenium-based scraper that collects data from Solscan and saves it to a Google Sheet.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | - Python 3.11+
 8 | 
 9 | ## Installation
10 | 
11 | First, clone the repository to your local machine:
12 | 
13 | ```bash
14 | git clone https://github.com/alexmudrak/solscan-parser.git
15 | cd solscan-parser
16 | ```
17 | 
18 | Install the dependencies:
19 | 
20 | ```bash
21 | pip install -r requirements.txt
22 | ```
23 | 
24 | ## Configuration
25 | 
26 | Before running the application, you need to enable the Google Sheets API and download the `credentials.json` file:
27 | 
28 | 1. Enable the Google Sheets API by visiting:
29 |    [Enable Google Sheets API](https://console.cloud.google.com/apis/enableflow?apiid=sheets.googleapis.com)
30 | 
31 | 2. Download the `credentials.json` file from:
32 |    [Google API Credentials](https://console.cloud.google.com/apis/credentials)
33 | 
34 | Place the `credentials.json` file in the root directory of the project.
35 | 
36 | ## Running the Application
37 | 
38 | To run the application, use the following command:
39 | 
40 | ```bash
41 | python src/main.py
42 | ```
43 | 
44 | ## Usage
45 | 
46 | The application will navigate to the Solscan website, extract the required data, and then authenticate with Google to access and update the specified Google Sheet with the transaction data.
47 | 


--------------------------------------------------------------------------------
/hashes.txt:
--------------------------------------------------------------------------------
1 | # Add a new hash on a new line. Any leading or
2 | # trailing quotation marks will be removed
3 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.pyright]
 2 | executionEnvironments = [
 3 |   { root = "./", extraPaths = [ "src" ]  }
 4 | ]
 5 | 
 6 | [tool.black]
 7 | line-length = 79
 8 | 
 9 | [tool.isort]
10 | line_length = 79
11 | profile = "black"
12 | 
13 | [tool.ruff]
14 | line-length = 79
15 | lint.fixable = ["ALL"]
16 | 
17 | [tool.coverage.report]
18 | exclude_lines= [
19 |     "pragma: no cover",
20 |     "def __repr__",
21 |     "if __name__ == .__main__.:",
22 |     ]
23 | 
24 | omit = [
25 |     "*/__init__.py",
26 |     ]
27 | 
28 | [tool.coverage.run]
29 | source = ["src/*"]
30 | 
31 | omit = [
32 |     "*/__init__.py",
33 |     ]
34 | 
35 | [tool.pytest.ini_options]
36 | pythonpath = "src"
37 | python_files = [
38 |     "tests.py",
39 |     "test_*.py",
40 |     "*_tests.py",
41 |     ]
42 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | # This file was autogenerated by uv via the following command:
  2 | #    uv pip compile requirements.txt -o requirements.txt
  3 | annotated-types==0.7.0
  4 |     # via pydantic
  5 | attrs==25.1.0
  6 |     # via
  7 |     #   outcome
  8 |     #   trio
  9 | cachetools==5.5.1
 10 |     # via google-auth
 11 | certifi==2024.12.14
 12 |     # via
 13 |     #   requests
 14 |     #   selenium
 15 | charset-normalizer==3.4.1
 16 |     # via requests
 17 | google-api-core==2.24.0
 18 |     # via google-api-python-client
 19 | google-api-python-client==2.159.0
 20 | google-auth==2.38.0
 21 |     # via
 22 |     #   google-api-core
 23 |     #   google-api-python-client
 24 |     #   google-auth-httplib2
 25 |     #   google-auth-oauthlib
 26 | google-auth-httplib2==0.2.0
 27 |     # via google-api-python-client
 28 | google-auth-oauthlib==1.2.1
 29 | googleapis-common-protos==1.66.0
 30 |     # via google-api-core
 31 | h11==0.14.0
 32 |     # via wsproto
 33 | httplib2==0.22.0
 34 |     # via
 35 |     #   google-api-python-client
 36 |     #   google-auth-httplib2
 37 | idna==3.10
 38 |     # via
 39 |     #   requests
 40 |     #   trio
 41 | oauthlib==3.2.2
 42 |     # via requests-oauthlib
 43 | outcome==1.3.0.post0
 44 |     # via trio
 45 | proto-plus==1.25.0
 46 |     # via google-api-core
 47 | protobuf==5.29.3
 48 |     # via
 49 |     #   google-api-core
 50 |     #   googleapis-common-protos
 51 |     #   proto-plus
 52 | pyasn1==0.6.1
 53 |     # via
 54 |     #   pyasn1-modules
 55 |     #   rsa
 56 | pyasn1-modules==0.4.1
 57 |     # via google-auth
 58 | pydantic==2.10.6
 59 |     # via pydantic-settings
 60 | pydantic-core==2.27.2
 61 |     # via pydantic
 62 | pydantic-settings==2.7.1
 63 | pyparsing==3.2.1
 64 |     # via httplib2
 65 | pysocks==1.7.1
 66 |     # via urllib3
 67 | python-dotenv==1.0.1
 68 |     # via pydantic-settings
 69 | requests==2.32.3
 70 |     # via
 71 |     #   google-api-core
 72 |     #   requests-oauthlib
 73 |     #   undetected-chromedriver
 74 | requests-oauthlib==2.0.0
 75 |     # via google-auth-oauthlib
 76 | rsa==4.9
 77 |     # via google-auth
 78 | selenium==4.28.1
 79 |     # via undetected-chromedriver
 80 | sniffio==1.3.1
 81 |     # via trio
 82 | sortedcontainers==2.4.0
 83 |     # via trio
 84 | trio==0.28.0
 85 |     # via
 86 |     #   selenium
 87 |     #   trio-websocket
 88 | trio-websocket==0.11.1
 89 |     # via selenium
 90 | typing-extensions==4.12.2
 91 |     # via
 92 |     #   pydantic
 93 |     #   pydantic-core
 94 |     #   selenium
 95 | undetected-chromedriver==3.5.5
 96 | uritemplate==4.1.1
 97 |     # via google-api-python-client
 98 | urllib3==2.3.0
 99 |     # via
100 |     #   requests
101 |     #   selenium
102 | websocket-client==1.8.0
103 |     # via selenium
104 | websockets==14.2
105 |     # via undetected-chromedriver
106 | wsproto==1.2.0
107 |     # via trio-websocket
108 | 


--------------------------------------------------------------------------------
/src/core/config.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | from pydantic_settings import BaseSettings, SettingsConfigDict
 5 | 
 6 | 
 7 | class Settings(BaseSettings):
 8 |     model_config = SettingsConfigDict(
 9 |         extra="allow",
10 |         env_file=".env",
11 |         env_file_encoding="utf-8",
12 |     )
13 | 
14 |     # Logger config
15 |     log_level: str = "INFO"
16 |     logging.basicConfig(level=log_level)
17 | 
18 |     if log_level != "DEBUG":
19 |         logging.getLogger("googleapiclient").setLevel(logging.WARNING)
20 |         logging.getLogger("undetected_chromedriver").setLevel(logging.WARNING)
21 | 
22 |     # Source settings
23 |     hashes_file_path: str | None = os.getenv("HASHES_FILE_PATH", None)
24 | 
25 |     # Parse settings
26 |     main_url: str = "https://solscan.io/account/"
27 | 
28 |     # Google sheets settings
29 |     sheet_scopes: list[str] = [
30 |         "https://www.googleapis.com/auth/spreadsheets",
31 |         "https://www.googleapis.com/auth/drive",
32 |     ]
33 |     sheet_title: str = "[RESULT] SOLSCAN parse"
34 |     sheet_first_list_name: str = "Result"
35 |     sheet_range: str = f"{sheet_first_list_name}!A2:F"
36 |     sheet_headers: list[str] = [
37 |         "Date",
38 |         "Hash",
39 |         "SOL count",
40 |         "SOL usd",
41 |         "SPL count",
42 |         "SPL usd",
43 |     ]
44 | 
45 | 
46 | settings = Settings()
47 | 


--------------------------------------------------------------------------------
/src/core/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | class CustomFormatter(logging.Formatter):
 5 |     def format(self, record):
 6 |         record.levelname = f"{record.levelname}:"
 7 |         return super().format(record)
 8 | 
 9 | 
10 | def get_logger(
11 |     logger_name: str,
12 | ):
13 |     logger = logging.getLogger(logger_name)
14 | 
15 |     if not logger.handlers:
16 |         formatter = CustomFormatter(
17 |             "%(levelname)-9s %(asctime)s - " "[%(name)s] - %(message)s"
18 |         )
19 | 
20 |         console_handler = logging.StreamHandler()
21 |         console_handler.setFormatter(formatter)
22 |         logger.addHandler(console_handler)
23 | 
24 |         logger.propagate = False
25 | 
26 |     return logger
27 | 


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
 1 | from core.config import settings
 2 | from core.logger import get_logger
 3 | from services.solscan_parser import SolscanParser
 4 | 
 5 | logger = get_logger(__name__)
 6 | 
 7 | 
 8 | def read_hashes_from_file(file_path: str) -> list[str]:
 9 |     try:
10 |         with open(file_path, "r") as file:
11 |             return [
12 |                 line.replace('"', "").replace("'", "").strip()
13 |                 for line in file
14 |                 if line.strip() and not line.startswith("#")
15 |             ]
16 |     except Exception as e:
17 |         logger.exception(
18 |             "An error occurred while reading hashes from the file."
19 |         )
20 |         raise e
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     # TODO: get hashes from Google sheet
25 |     file_path = settings.hashes_file_path
26 |     if not file_path:
27 |         msg = "The path to the hashes file is not specified in the settings."
28 |         logger.error(msg)
29 |         raise ValueError(msg)
30 | 
31 |     hashes = read_hashes_from_file(file_path)
32 | 
33 |     with SolscanParser(hashes) as parser:
34 |         parser.process_hashes()
35 | 


--------------------------------------------------------------------------------
/src/main_speed.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import time
 3 | 
 4 | import undetected_chromedriver as uc
 5 | from curl_cffi.requests import AsyncSession
 6 | 
 7 | UPDATED = {"status": False}
 8 | COOKIES = {}
 9 | COOKIES_URL = "https://solscan.io/account/"
10 | ACCOUNT_HASH = "H6ahDptbaMtEp2Kk4CBVqHbUbZTj7WSxyPP5Yc8C7ngY"
11 | API_ACCOUNT_URL = (
12 |     "https://api.solscan.io/v2/account?address={account_hash}&cluster="
13 | )
14 | API_SPL_URL = "https://api.solscan.io/v2/account/v2/tokens?address={account_hash}&cluster="
15 | 
16 | USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Apple WebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"
17 | HEADERS = {
18 |     "au-be": "%10%06yQQwT%07zPY%04%1C",
19 |     "referer": "https://solscan.io/",
20 |     "origin": "https://solscan.io",
21 |     "User-Agent": USER_AGENT,
22 | }
23 | MAX_RETRIES = 10
24 | 
25 | 
26 | def get_cookies(url: str = COOKIES_URL):
27 |     if not UPDATED["status"]:
28 |         UPDATED["status"] = True
29 |         chrome_options = uc.ChromeOptions()
30 |         chrome_options.add_argument("--disable-popup-blocking")
31 |         # chrome_options.add_argument("--headless")
32 | 
33 |         driver = uc.Chrome(use_subprocess=True, options=chrome_options)
34 |         driver.get(url)
35 |         driver.execute_script(f"window.open('{url}', '_blank')")
36 |         time.sleep(10)
37 | 
38 |         cookies = driver.get_cookies()
39 |         for item in cookies:
40 |             if item.get("name") == "cf_clearance":
41 |                 COOKIES["cf_clearance"] = item.get("value")
42 | 
43 | 
44 | async def get_request(num: int, hash: str, retry: int = 0):
45 |     current_retry = retry
46 |     if current_retry < MAX_RETRIES:
47 |         async with AsyncSession() as client:
48 |             try:
49 |                 account = await client.get(
50 |                     API_ACCOUNT_URL.format(account_hash=hash),
51 |                     headers=HEADERS,
52 |                     cookies=COOKIES,
53 |                     timeout=60,
54 |                 )
55 |                 spl = await client.get(
56 |                     API_SPL_URL.format(account_hash=hash),
57 |                     headers=HEADERS,
58 |                     cookies=COOKIES,
59 |                     timeout=60,
60 |                 )
61 |                 result = (account, spl)
62 |                 UPDATED["status"] = False
63 |                 print(f"#{num} - {account.status_code} - {spl.status_code}")
64 |             except Exception:
65 |                 if not UPDATED["status"]:
66 |                     # print(UPDATED["status"])
67 |                     print(f"#{num} - Trying get new cookies")
68 |                     get_cookies()
69 |                 result = await get_request(num, hash, current_retry + 1)
70 | 
71 |             return result
72 |     return None
73 | 
74 | 
75 | async def runner():
76 |     # tasks = [get_request(i, API_URL) for i in range(100)]
77 |     # await asyncio.gather(*tasks)
78 |     await get_request(1, ACCOUNT_HASH)
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     get_cookies()
83 |     asyncio.run(runner())
84 | 


--------------------------------------------------------------------------------
/src/schemas/parse_schemas.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class SolscanResult(BaseModel):
 7 |     date: datetime = datetime.now()
 8 |     hash: str = "-"
 9 |     sol_count: str = "-"
10 |     sol_usd: str = "-"
11 |     spl_count: str = "-"
12 |     spl_usd: str = "-"
13 | 


--------------------------------------------------------------------------------
/src/services/google_sheets.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from google.auth.exceptions import GoogleAuthError
  4 | from google.auth.external_account_authorized_user import (
  5 |     Credentials as FlowCredentials,
  6 | )
  7 | from google.auth.transport.requests import Request
  8 | from google.oauth2.credentials import Credentials
  9 | from google_auth_oauthlib.flow import InstalledAppFlow
 10 | from googleapiclient.discovery import build
 11 | from googleapiclient.errors import HttpError
 12 | 
 13 | from core.config import settings
 14 | from core.logger import get_logger
 15 | from schemas.parse_schemas import SolscanResult
 16 | 
 17 | logger = get_logger(__name__)
 18 | 
 19 | 
 20 | class GoogleSheets:
 21 |     def __init__(self):
 22 |         self.token_path = "token.json"
 23 |         self.credentials_path = "credentials.json"
 24 | 
 25 |         self.scopes = settings.sheet_scopes
 26 |         self.title = settings.sheet_title
 27 |         self.range = settings.sheet_range
 28 |         self.list = settings.sheet_first_list_name
 29 |         self.headers = settings.sheet_headers
 30 |         self.creds = self.initialize_credentials()
 31 | 
 32 |     def initialize_credentials(self) -> Credentials | FlowCredentials:
 33 |         logger.info("Initializing Google Sheets credentials.")
 34 | 
 35 |         creds = None
 36 |         if os.path.exists(self.token_path):
 37 |             try:
 38 |                 creds = Credentials.from_authorized_user_file(
 39 |                     self.token_path, self.scopes
 40 |                 )
 41 |                 logger.info("Loaded credentials from token file.")
 42 |             except Exception as e:
 43 |                 logger.error(
 44 |                     f"Failed to load credentials from token file: {e}"
 45 |                 )
 46 |         if not creds or not creds.valid:
 47 |             creds = self.refresh_credentials(creds)
 48 | 
 49 |         return creds
 50 | 
 51 |     def refresh_credentials(
 52 |         self, creds: Credentials | FlowCredentials | None
 53 |     ) -> Credentials | FlowCredentials:
 54 |         if creds and creds.expired and creds.refresh_token:
 55 |             try:
 56 |                 creds.refresh(Request())
 57 |                 logger.info("Credentials refreshed successfully.")
 58 |             except Exception as e:
 59 |                 logger.error(f"Failed to refresh credentials: {e}")
 60 |                 creds = self.run_authentication_flow()
 61 |         else:
 62 |             creds = self.run_authentication_flow()
 63 |         self.save_credentials(creds)
 64 |         return creds
 65 | 
 66 |     def run_authentication_flow(self) -> Credentials | FlowCredentials:
 67 |         logger.info("Running authentication flow.")
 68 |         try:
 69 |             flow = InstalledAppFlow.from_client_secrets_file(
 70 |                 self.credentials_path, self.scopes
 71 |             )
 72 |             creds = flow.run_local_server(port=0)
 73 |             logger.info("Authentication flow completed.")
 74 |         except Exception as e:
 75 |             logger.error(f"Authentication flow failed: {e}")
 76 |             raise e
 77 |         return creds
 78 | 
 79 |     def save_credentials(self, creds: Credentials | FlowCredentials):
 80 |         logger.info("Saving credentials to file.")
 81 |         try:
 82 |             with open(self.token_path, "w") as token_file:
 83 |                 token_file.write(creds.to_json())
 84 |                 logger.info("Credentials saved successfully.")
 85 |         except IOError as e:
 86 |             logger.error(f"Unable to save credentials: {e}")
 87 | 
 88 |     def manage_spreadsheet(self, data: SolscanResult) -> bool:
 89 |         try:
 90 |             drive_service = build("drive", "v3", credentials=self.creds)
 91 |             sheet_service = build("sheets", "v4", credentials=self.creds)
 92 |             spreadsheet_id = self.find_or_create_spreadsheet(
 93 |                 drive_service, sheet_service
 94 |             )
 95 |             if spreadsheet_id:
 96 |                 logger.info(f"Try to add data: {data}")
 97 |                 self.update_sheet(sheet_service, spreadsheet_id, data)
 98 |                 return True
 99 |         except (HttpError, GoogleAuthError) as err:
100 |             logger.error(f"Error managing spreadsheet: {err}")
101 |         return False
102 | 
103 |     def find_or_create_spreadsheet(self, drive_service, sheet_service) -> str:
104 |         logger.debug("Finding or creating spreadsheet.")
105 |         try:
106 |             response = (
107 |                 drive_service.files()
108 |                 .list(
109 |                     q=f"name='{self.title}' and mimeType='application/vnd.google-apps.spreadsheet'",
110 |                     spaces="drive",
111 |                     fields="files(id, name)",
112 |                 )
113 |                 .execute()
114 |             )
115 |             files = response.get("files", [])
116 |             if files:
117 |                 logger.info("Spreadsheet found.")
118 |                 return files[0].get("id")
119 |             else:
120 |                 logger.info("Spreadsheet not found, creating a new one.")
121 |                 return self.create_spreadsheet(sheet_service)
122 |         except Exception as e:
123 |             logger.error(f"Error finding or creating spreadsheet: {e}")
124 |             raise e
125 | 
126 |     def create_spreadsheet(self, sheet_service) -> str:
127 |         logger.info("Creating a new spreadsheet.")
128 |         try:
129 |             spreadsheet = (
130 |                 sheet_service.spreadsheets()
131 |                 .create(body={"properties": {"title": self.title}})
132 |                 .execute()
133 |             )
134 |             spreadsheet_id = spreadsheet.get("spreadsheetId")
135 |             self.setup_sheet(sheet_service, spreadsheet_id)
136 |             logger.info("New spreadsheet created.")
137 |             return spreadsheet_id
138 |         except Exception as e:
139 |             logger.error(f"Error creating spreadsheet: {e}")
140 |             raise e
141 | 
142 |     def setup_sheet(
143 |         self,
144 |         sheet_service,
145 |         spreadsheet_id: str,
146 |     ):
147 |         logger.info("Setting up the spreadsheet.")
148 |         try:
149 |             sheet_id = (
150 |                 sheet_service.spreadsheets()
151 |                 .get(spreadsheetId=spreadsheet_id)
152 |                 .execute()["sheets"][0]["properties"]["sheetId"]
153 |             )
154 |             batch_update_spreadsheet_request_body = {
155 |                 "requests": [
156 |                     {
157 |                         "updateSheetProperties": {
158 |                             "properties": {
159 |                                 "sheetId": sheet_id,
160 |                                 "title": self.list,
161 |                             },
162 |                             "fields": "title",
163 |                         }
164 |                     }
165 |                 ]
166 |             }
167 |             sheet_service.spreadsheets().batchUpdate(
168 |                 spreadsheetId=spreadsheet_id,
169 |                 body=batch_update_spreadsheet_request_body,
170 |             ).execute()
171 |             sheet_service.spreadsheets().values().update(
172 |                 spreadsheetId=spreadsheet_id,
173 |                 range=f"{self.list}!A1",
174 |                 body={"values": [self.headers]},
175 |                 valueInputOption="USER_ENTERED",
176 |             ).execute()
177 |             logger.info("Spreadsheet setup completed.")
178 |         except Exception as e:
179 |             logger.error(f"Error setting up spreadsheet: {e}")
180 |             raise e
181 | 
182 |     def update_sheet(
183 |         self,
184 |         sheet_service,
185 |         spreadsheet_id: str,
186 |         data: SolscanResult,
187 |     ):
188 |         logger.debug("Updating the spreadsheet with new data.")
189 |         try:
190 |             values = [
191 |                 [
192 |                     str(data.date),
193 |                     data.hash,
194 |                     data.sol_count,
195 |                     data.sol_usd,
196 |                     data.spl_count,
197 |                     data.spl_usd,
198 |                 ],
199 |             ]
200 |             body = {"values": values}
201 |             sheet_service.spreadsheets().values().append(
202 |                 spreadsheetId=spreadsheet_id,
203 |                 range=self.range,
204 |                 valueInputOption="USER_ENTERED",
205 |                 body=body,
206 |             ).execute()
207 |             logger.debug("Data appended to the spreadsheet successfully.")
208 |         except Exception as e:
209 |             logger.error(f"Error updating the spreadsheet: {e}")
210 |             raise e
211 | 


--------------------------------------------------------------------------------
/src/services/solscan_parser.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from datetime import datetime
  3 | from urllib.parse import urljoin
  4 | 
  5 | import undetected_chromedriver as uc
  6 | from selenium.common.exceptions import TimeoutException
  7 | from selenium.webdriver.common.by import By
  8 | from selenium.webdriver.remote.webelement import WebElement
  9 | from selenium.webdriver.support import expected_conditions as EC
 10 | from selenium.webdriver.support.wait import WebDriverWait
 11 | 
 12 | from core.config import settings
 13 | from core.logger import get_logger
 14 | from schemas.parse_schemas import SolscanResult
 15 | from services.google_sheets import GoogleSheets
 16 | 
 17 | logger = get_logger(__name__)
 18 | 
 19 | 
 20 | class SolscanParser:
 21 |     def __init__(self, hashes: list[str]):
 22 |         self.google_sheets = GoogleSheets()
 23 |         self.url = settings.main_url
 24 |         self.hashes = hashes
 25 |         self.driver = None
 26 | 
 27 |     def __enter__(self):
 28 |         logger.info("Entering to the browser...")
 29 | 
 30 |         chrome_options = uc.ChromeOptions()
 31 |         chrome_options.add_argument("--disable-popup-blocking")
 32 |         chrome_options.add_argument("--headless")
 33 | 
 34 |         self.driver = uc.Chrome(use_subprocess=True, options=chrome_options)
 35 | 
 36 |         return self
 37 | 
 38 |     def __exit__(self, exc_type, exc_value, _):
 39 |         logger.info("Exiting from the browser...")
 40 |         if exc_type:
 41 |             logger.error(f"An exception occurred: {exc_value}")
 42 | 
 43 |         if self.driver:
 44 |             self.driver.close()
 45 | 
 46 |         return False
 47 | 
 48 |     def proccess_sol(
 49 |         self, result: SolscanResult, elem: WebElement
 50 |     ) -> SolscanResult:
 51 |         logger.debug(f"Trying process SOL for {result.hash}: {elem.text}")
 52 |         if elem:
 53 |             row_text = elem.text
 54 |             sol_balance = row_text.split(" ")[0].strip()
 55 |             result.sol_count = sol_balance
 56 | 
 57 |         return result
 58 | 
 59 |     def proccess_spl(
 60 |         self, result: SolscanResult, elem: WebElement
 61 |     ) -> SolscanResult:
 62 |         logger.debug(f"Trying process SPL for {result.hash}: {elem.text}")
 63 |         if elem:
 64 |             row_text = elem.text
 65 |             spl_count, spl_usd = row_text.split("\n")
 66 |             spl_count = spl_count.split(" ")[0]
 67 |             spl_usd = (
 68 |                 spl_usd.replace("(", "").replace(")", "").replace("$", "")
 69 |             )
 70 | 
 71 |             result.spl_count = spl_count
 72 |             result.spl_usd = spl_usd
 73 | 
 74 |         return result
 75 | 
 76 |     def parse_sol_values(self, result: SolscanResult, driver) -> SolscanResult:
 77 |         logger.debug(f"Trying parse SOL for {result.hash}")
 78 |         sol_balance_element = WebDriverWait(driver, 5).until(
 79 |             EC.presence_of_element_located(
 80 |                 (
 81 |                     By.XPATH,
 82 |                     ("//div[text()='SOL Balance']/following::div"),
 83 |                 )
 84 |             )
 85 |         )
 86 |         result = self.proccess_sol(result, sol_balance_element)
 87 | 
 88 |         return result
 89 | 
 90 |     def parse_spl_values(self, result: SolscanResult, driver) -> SolscanResult:
 91 |         logger.debug(f"Trying parse SPL for {result.hash}")
 92 |         spl_balance_element = WebDriverWait(driver, 5).until(
 93 |             EC.presence_of_element_located(
 94 |                 (
 95 |                     By.XPATH,
 96 |                     ("//div[text()='Token Balance']/following::div"),
 97 |                 )
 98 |             )
 99 |         )
100 |         result = self.proccess_spl(result, spl_balance_element)
101 | 
102 |         return result
103 | 
104 |     def fix_cf_just_moment(self, url: str, driver):
105 |         # Fix CF `Just moment...` loading
106 |         driver.execute_script(f"window.open('{url}', '_blank')")
107 |         driver.switch_to.window(driver.window_handles[1])
108 |         time.sleep(3)
109 |         driver.close()
110 | 
111 |     def get_parse_result(self, hash: str) -> SolscanResult:
112 |         if not self.driver:
113 |             msg = "Please use context for the `SolscanParser`"
114 |             logger.critical(msg)
115 |             raise ValueError(msg)
116 | 
117 |         result = SolscanResult(date=datetime.now(), hash=hash)
118 |         url = urljoin(self.url, hash)
119 | 
120 |         logger.info(f"Try parse: {result.hash}")
121 | 
122 |         driver = self.driver
123 |         driver.get(url)
124 | 
125 |         self.fix_cf_just_moment(url, driver)
126 |         driver.switch_to.window(driver.window_handles[0])
127 | 
128 |         try:
129 |             result = self.parse_sol_values(result, driver)
130 |             result = self.parse_spl_values(result, driver)
131 |         except TimeoutException:
132 |             logger.info(
133 |                 f"Can't found SQL Balance or Token values: {result.hash}"
134 |             )
135 |             pass
136 | 
137 |         return result
138 | 
139 |     def process_hashes(self):
140 |         for hash in self.hashes:
141 |             parse_result = self.get_parse_result(hash)
142 |             self.google_sheets.manage_spreadsheet(parse_result)
143 | 


--------------------------------------------------------------------------------