├── .gitignore ├── poetry.lock ├── pyproject.toml ├── test.py ├── tests └── __init__.py └── ultima_scraper_collection ├── __init__.py ├── config.py ├── helpers ├── __init__.py └── main_helper.py ├── managers ├── __init__.py ├── aio_pika_wrapper.py ├── content_manager.py ├── database_manager │ ├── __init__.py │ ├── connections │ │ ├── __init__.py │ │ └── sqlite │ │ │ ├── __init__.py │ │ │ ├── databases │ │ │ └── user_data │ │ │ │ └── migration │ │ │ │ ├── __init__.py │ │ │ │ ├── alembic.ini │ │ │ │ ├── alembic │ │ │ │ ├── README │ │ │ │ ├── env.py │ │ │ │ ├── script.py.mako │ │ │ │ └── versions │ │ │ │ │ ├── 0d4d92c0498e_content.py │ │ │ │ │ ├── 1454e4d1c6b8_content.py │ │ │ │ │ ├── 37c4f2719d65_content.py │ │ │ │ │ ├── 5493253cc03c_content.py │ │ │ │ │ ├── b791cf213df9_content.py │ │ │ │ │ └── d2f2002f3c36_content.py │ │ │ │ └── base_user_database.db │ │ │ ├── legacy_databases │ │ │ ├── __init__.py │ │ │ ├── messages │ │ │ │ ├── __init__.py │ │ │ │ └── migration │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── alembic.ini │ │ │ │ │ ├── alembic │ │ │ │ │ ├── env.py │ │ │ │ │ ├── script.py.mako │ │ │ │ │ └── versions │ │ │ │ │ │ ├── 2c36fcc0b921_content.py │ │ │ │ │ │ ├── 7c1c6e101059_content.py │ │ │ │ │ │ ├── aeb9fe314556_content.py │ │ │ │ │ │ ├── bf20242a238f_content.py │ │ │ │ │ │ └── d0118d8ec0b4_content.py │ │ │ │ │ ├── messages.py │ │ │ │ │ └── test_messages.db │ │ │ ├── posts │ │ │ │ └── migration │ │ │ │ │ ├── alembic.ini │ │ │ │ │ ├── alembic │ │ │ │ │ ├── env.py │ │ │ │ │ ├── script.py.mako │ │ │ │ │ └── versions │ │ │ │ │ │ ├── 194e05269f09_content.py │ │ │ │ │ │ ├── 5b4bea08c27f_content.py │ │ │ │ │ │ ├── 6b1b10eb67de_content.py │ │ │ │ │ │ ├── 990fc1108317_content.py │ │ │ │ │ │ └── a918b6b05d2f_content.py │ │ │ │ │ ├── posts.py │ │ │ │ │ └── test_posts.db │ │ │ └── stories │ │ │ │ └── migration │ │ │ │ ├── alembic.ini │ │ │ │ ├── alembic │ │ │ │ ├── env.py │ │ │ │ ├── script.py.mako │ │ │ │ └── versions │ │ │ │ │ ├── 29f675c35eee_content.py │ │ │ │ │ ├── 2e4f8364f7e2_content.py │ │ │ │ │ ├── 3076beb33c1b_content.py │ │ │ │ │ ├── e0c73f066547_content.py │ │ │ │ │ └── ebc3f4bb0782_content.py │ │ │ │ ├── stories.py │ │ │ │ └── test_stories.db │ │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── api_model.py │ │ │ ├── media_model.py │ │ │ └── user_database.py │ │ │ └── sqlite_database.py │ └── database_manager.py ├── datascraper_manager │ ├── __init__.py │ ├── datascraper_manager.py │ └── datascrapers │ │ ├── __init__.py │ │ ├── fansly.py │ │ └── onlyfans.py ├── download_manager.py ├── filesystem_manager.py ├── metadata_manager │ ├── __init__.py │ └── metadata_manager.py ├── option_manager.py └── server_manager.py ├── modules ├── __init__.py └── module_streamliner.py ├── projects ├── __init__.py ├── project_manager.py └── ultima_archive.py └── py.typed /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.gitignore.io/api/code,linux,python,pycharm,windows 2 | # Edit at https://www.gitignore.io/?templates=code,linux,python,pycharm,windows 3 | 4 | ### Code ### 5 | .vscode/* 6 | 7 | ### PyCharm Patch ### 8 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 9 | .idea/* 10 | 11 | ### Python ### 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | pip-wheel-metadata/ 35 | share/python-wheels/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | MANIFEST 40 | 41 | ### venv ### 42 | # Virtualenv 43 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ 44 | .Python 45 | [Bb]in 46 | [Ii]nclude 47 | [Ll]ib 48 | [Ll]ib64 49 | [Ll]ocal 50 | [Ss]cripts 51 | pyvenv.cfg 52 | .env 53 | .venv 54 | env/ 55 | venv/ 56 | ENV/ 57 | env.bak/ 58 | venv.bak/ 59 | pip-selfcheck.json 60 | 61 | 62 | # PyInstaller 63 | # Usually these files are written by a python script from a template 64 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 65 | *.manifest 66 | *.spec 67 | 68 | # Installer logs 69 | pip-log.txt 70 | pip-delete-this-directory.txt 71 | 72 | # Unit test / coverage reports 73 | htmlcov/ 74 | .tox/ 75 | .nox/ 76 | .coverage 77 | .coverage.* 78 | .cache 79 | nosetests.xml 80 | coverage.xml 81 | *.cover 82 | .hypothesis/ 83 | .pytest_cache/ 84 | 85 | # Translations 86 | *.mo 87 | *.pot 88 | 89 | # Scrapy stuff: 90 | .scrapy 91 | 92 | # Sphinx documentation 93 | docs/_build/ 94 | 95 | # PyBuilder 96 | target/ 97 | 98 | # pyenv 99 | .python-version 100 | 101 | # pipenv 102 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 103 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 104 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 105 | # install all needed dependencies. 106 | #Pipfile.lock 107 | 108 | # mkdocs documentation 109 | /site 110 | 111 | # mypy 112 | .mypy_cache/ 113 | .dmypy.json 114 | dmypy.json 115 | 116 | ### Windows ### 117 | # Windows thumbnail cache files 118 | Thumbs.db 119 | Thumbs.db:encryptable 120 | ehthumbs.db 121 | ehthumbs_vista.db 122 | 123 | ### Mac ### 124 | .DS_Store 125 | 126 | # Dump file 127 | *.stackdump 128 | 129 | # Folder config file 130 | [Dd]esktop.ini 131 | 132 | # Recycle Bin used on file shares 133 | $RECYCLE.BIN/ 134 | 135 | # Windows Installer files 136 | *.cab 137 | *.msi 138 | *.msix 139 | *.msm 140 | *.msp 141 | 142 | # Windows shortcuts 143 | *.lnk 144 | 145 | # End of https://www.gitignore.io/api/code,linux,python,pycharm,windows 146 | 147 | # Project Specific 148 | typings 149 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "ultima-scraper-collection" 3 | version = "2.3.18" 4 | description = "" 5 | authors = ["UltimaHoarder <1285176+UltimaHoarder@users.noreply.github.com>"] 6 | packages = [{ include = "ultima_scraper_collection" }] 7 | include = ["ultima_scraper_collection/py.typed"] 8 | 9 | [tool.poetry.dependencies] 10 | python = ">=3.10,<4" 11 | sqlalchemy = "^2.0.1" 12 | psycopg2 = "^2.9.5" 13 | alembic = "^1.9.2" 14 | ffmpeg-python = "^0.2.0" 15 | pydantic = "^2.0" 16 | netifaces = "^0.11.0" 17 | sshtunnel = "^0.4.0" 18 | inflection = "^0.5.1" 19 | alive-progress = "^3.1.5" 20 | aio-pika = "^9.4.1" 21 | ujson = "^5.10.0" 22 | 23 | ultima-scraper-api = "^2.0" 24 | ultima-scraper-renamer = "^1.0" 25 | ultima-scraper-db = "^0.3" 26 | ultima-scraper-detector = "^0.1" 27 | appdirs = "^1.4.4" 28 | 29 | [tool.poetry.group.dev.dependencies] 30 | python-semantic-release = "^7.33.2" 31 | black = { version = "^23.3.0", allow-prereleases = true } 32 | 33 | [tool.semantic_release] 34 | version_toml = "pyproject.toml:tool.poetry.version" 35 | 36 | [build-system] 37 | requires = ["poetry-core"] 38 | build-backend = "poetry.core.masonry.api" 39 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | import ultima_scraper_api 4 | 5 | from ultima_scraper_collection.config import UltimaScraperCollectionConfig 6 | from ultima_scraper_collection.managers.datascraper_manager.datascraper_manager import ( 7 | DataScraperManager, 8 | ) 9 | from ultima_scraper_collection.managers.server_manager import ServerManager 10 | 11 | 12 | async def main(): 13 | config = UltimaScraperCollectionConfig() 14 | server_manager = ServerManager(config.settings.databases[0].connection_info.dict()) 15 | api = ultima_scraper_api.select_api("OnlyFans") 16 | _datascraper = DataScraperManager(server_manager, config).select_datascraper(api) 17 | pass 18 | 19 | 20 | asyncio.run(main()) 21 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/tests/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/__init__.py: -------------------------------------------------------------------------------- 1 | from ultima_scraper_collection.managers.datascraper_manager.datascrapers.fansly import ( 2 | FanslyDataScraper, 3 | ) 4 | from ultima_scraper_collection.managers.datascraper_manager.datascrapers.onlyfans import ( 5 | OnlyFansDataScraper, 6 | ) 7 | 8 | datascraper_types = OnlyFansDataScraper | FanslyDataScraper 9 | -------------------------------------------------------------------------------- /ultima_scraper_collection/config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import ujson 4 | from appdirs import user_config_dir 5 | from pydantic import BaseModel, StrictBool, StrictInt, StrictStr 6 | from ultima_scraper_api.config import FanslyAPIConfig 7 | from ultima_scraper_api.config import GlobalAPI as USAGlobalAPI 8 | from ultima_scraper_api.config import OnlyFansAPIConfig, Settings 9 | from ultima_scraper_api.config import Sites as USASites 10 | from ultima_scraper_api.config import UltimaScraperAPIConfig 11 | 12 | 13 | class Jobs(BaseModel): 14 | class Scrape(BaseModel): 15 | subscriptions: bool = True 16 | messages: bool = True 17 | paid_contents: bool = True 18 | 19 | class Metadata: 20 | content: bool = True 21 | comments: bool = True 22 | 23 | scrape: Scrape = Scrape() 24 | metadata: Scrape = Scrape() 25 | 26 | 27 | class Directory(BaseModel): 28 | path: Path | None = None 29 | minimum_space: int = -1 30 | store: bool = True 31 | overflow: bool = True 32 | 33 | 34 | class GlobalXPathSetup(BaseModel): 35 | directories: list[Directory] = [Directory(path=Path("__user_data__").absolute())] 36 | directory_format: Path = Path() 37 | 38 | 39 | class DownloadPathSetup(GlobalXPathSetup): 40 | filename_format: Path = Path() 41 | text_length: int = 255 42 | date_format: str = "%Y-%m-%d" 43 | overwrite_files: bool = True 44 | 45 | 46 | class Trash(BaseModel): 47 | cleanup: bool = True 48 | 49 | 50 | class ToolSettings(BaseModel): 51 | active: bool = True 52 | 53 | 54 | class Renamer(ToolSettings): 55 | pass 56 | 57 | 58 | class Reformatter(ToolSettings): 59 | pass 60 | 61 | 62 | class Downloader(ToolSettings): 63 | pass 64 | 65 | 66 | class SSHConnection(BaseModel): 67 | username: str | None = None 68 | private_key_filepath: Path | None = None 69 | private_key_password: str | None = None 70 | host: str | None = None 71 | port: int = 22 72 | 73 | 74 | class DatabaseInfo(BaseModel): 75 | name: str = "ultima_archive" 76 | host: str = "localhost" 77 | port: int = 5432 78 | username: str | None = None 79 | password: str | None = None 80 | ssh: SSHConnection = SSHConnection() 81 | 82 | 83 | class Database(BaseModel): 84 | connection_info: DatabaseInfo = DatabaseInfo() 85 | main: bool = True 86 | active: bool = True 87 | 88 | 89 | class Tools(BaseModel): 90 | renamer: Renamer = Renamer() 91 | reformatter: Reformatter = Reformatter() 92 | downloader: Downloader = Downloader() 93 | 94 | 95 | auto_types = list[int | str] | StrictInt | StrictStr | StrictBool | None 96 | 97 | 98 | class GlobalAPI(USAGlobalAPI): 99 | auto_profile_choice: auto_types = None 100 | auto_performer_choice: auto_types = None 101 | auto_content_choice: auto_types = None 102 | auto_media_choice: auto_types = None 103 | jobs: Jobs = Jobs() 104 | metadata_setup: GlobalXPathSetup = GlobalXPathSetup() 105 | metadata_setup.directory_format: Path = Path( 106 | "{site_name}/{first_letter}/{model_username}/Metadata" 107 | ) 108 | download_setup: DownloadPathSetup = DownloadPathSetup() 109 | download_setup.directory_format: Path = Path( 110 | "{site_name}/{first_letter}/{model_username}/{api_type}/{value}/{media_type}" 111 | ) 112 | download_setup.filename_format: Path = Path("{filename}.{ext}") 113 | blacklists: list[str] = [] 114 | 115 | 116 | class Sites(USASites): 117 | class OnlyFansAPIConfig(OnlyFansAPIConfig, GlobalAPI): 118 | pass 119 | 120 | class FanslyAPIConfig(FanslyAPIConfig, GlobalAPI): 121 | pass 122 | 123 | onlyfans: OnlyFansAPIConfig = OnlyFansAPIConfig(auto_content_choice=True) 124 | fansly: FanslyAPIConfig = FanslyAPIConfig() 125 | 126 | 127 | site_config_types = Sites.OnlyFansAPIConfig | Sites.FanslyAPIConfig 128 | 129 | 130 | class UltimaScraperCollectionConfig(UltimaScraperAPIConfig): 131 | class Settings(Settings): 132 | auto_site_choice: str = "" 133 | databases: list[Database] = [Database()] 134 | tools: Tools = Tools() 135 | trash: Trash = Trash() 136 | infinite_loop: bool = False 137 | exit_on_completion: bool = True 138 | 139 | def get_main_database(self): 140 | return [x for x in self.databases if x.main][0] 141 | 142 | settings: Settings = Settings() 143 | site_apis: Sites = Sites() 144 | 145 | def load_default_config(self): 146 | config_dir = user_config_dir("ultima_scraper_verse") # type: ignore 147 | config_path = Path(config_dir) / "config.json" # type: ignore 148 | 149 | config_json = ujson.loads(config_path.read_text()) 150 | return UltimaScraperCollectionConfig(**config_json) 151 | 152 | def get_site_config(self, site_name: str): 153 | return getattr(self.site_apis, site_name.lower()) 154 | -------------------------------------------------------------------------------- /ultima_scraper_collection/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/helpers/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/helpers/main_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from shutil import disk_usage 4 | from typing import TYPE_CHECKING, Any 5 | 6 | from ultima_scraper_api import user_types 7 | 8 | from ultima_scraper_collection.config import Directory, UltimaScraperCollectionConfig 9 | import ujson 10 | 11 | if TYPE_CHECKING: 12 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import ( 13 | UserModel as DBUserModel, 14 | ) 15 | 16 | 17 | def check_space( 18 | custom_directory: list[Directory], 19 | ): 20 | root = "" 21 | while not root: 22 | paths: list[dict[str, Any]] = [] 23 | for directory in custom_directory: 24 | # ISSUE 25 | # Could cause problems w/ relative/symbolic links that point to another hard drive 26 | # Haven't tested if it calculates hard A or relative/symbolic B's total space. 27 | # size is in GB 28 | assert directory.path 29 | obj_Disk = disk_usage(str(directory.path.parent)) 30 | free = obj_Disk.free / (1024.0**3) 31 | x = {} 32 | x["path"] = directory.path 33 | x["free"] = free 34 | x["min_space"] = directory.minimum_space 35 | paths.append(x) 36 | for item in paths: 37 | download_path = item["path"] 38 | free = item["free"] 39 | if free > item["min_space"]: 40 | root = download_path 41 | break 42 | return root 43 | 44 | 45 | from ultima_scraper_api.apis.onlyfans.classes.user_model import ( 46 | create_user as OFUserModel, 47 | ) 48 | 49 | 50 | async def is_valuable(user: "DBUserModel | user_types"): 51 | """ 52 | Checks if the user is valuable based on their subscription status or if they have supplied content to a buyer. 53 | 54 | Args: 55 | user (DBUserModel | user_types): The user to check. 56 | 57 | Returns: 58 | bool: True if the user is valuable, False otherwise. 59 | """ 60 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import ( 61 | UserModel as DBUserModel, 62 | ) 63 | 64 | if isinstance(user, DBUserModel): 65 | if await user.find_buyers(active=True): 66 | return True 67 | else: 68 | return False 69 | else: 70 | if user.is_performer(): 71 | if isinstance(user, OFUserModel): 72 | if ( 73 | user.subscribed_is_expired_now == False 74 | or await user.get_paid_contents() 75 | ): 76 | return True 77 | else: 78 | return False 79 | else: 80 | # We need to add paid_content checker 81 | if user.following: 82 | return True 83 | else: 84 | return False 85 | else: 86 | return False 87 | 88 | 89 | async def is_notif_valuable(api_user: user_types): 90 | if await is_valuable(api_user): 91 | if await api_user.subscription_price() == 0: 92 | if isinstance(api_user, OFUserModel) and await api_user.get_paid_contents(): 93 | return True 94 | return False 95 | else: 96 | return True 97 | return False 98 | 99 | 100 | async def walk(directory: Path): 101 | all_files: list[Path] = [] 102 | for root, _subdirs, files in os.walk(directory): 103 | x = [Path(root, x) for x in files] 104 | all_files.extend(x) 105 | return all_files 106 | 107 | 108 | def find_unused_filename(filepath: Path): 109 | base_name = filepath.stem # Get the filename without extension 110 | extension = filepath.suffix # Get the file extension 111 | counter = 2 112 | 113 | while filepath.exists(): 114 | new_name = f"{base_name} ({counter}){extension}" 115 | filepath = filepath.with_name(new_name) 116 | counter += 1 117 | 118 | return filepath 119 | 120 | 121 | from PIL import Image 122 | 123 | 124 | def is_image_valid(file_path: Path): 125 | try: 126 | with Image.open(file_path) as img: 127 | # Attempt to open the image file 128 | img.load() # This will load the image data 129 | return True # If successful, the image is not corrupted 130 | except Exception as e: 131 | # An exception occurred, so the image might be corrupted 132 | print(f"Error: {e}") 133 | return False 134 | 135 | 136 | def load_config(config_path: Path): 137 | config_json = ujson.loads(config_path.read_text()) 138 | return UltimaScraperCollectionConfig(**config_json) 139 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/aio_pika_wrapper.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import aio_pika 4 | import ujson 5 | 6 | 7 | def create_notification( 8 | category: str, 9 | site_name: str, 10 | item: Any, 11 | ): 12 | json_message = { 13 | "site_name": site_name, 14 | "category": category, 15 | "performer_id": item.id, 16 | "username": item.username, 17 | } 18 | message = {"id": item.id, "data": json_message} 19 | return message 20 | 21 | 22 | def create_message( 23 | site_name: str, item: Any, mandatory_jobs: dict[str, dict[str, list[str]]] 24 | ): 25 | json_message = { 26 | "site_name": site_name, 27 | "performer_id": item.id, 28 | "username": item.username, 29 | "mandatory_jobs": mandatory_jobs, 30 | } 31 | message = {"id": item.id, "data": json_message} 32 | return message 33 | 34 | 35 | class AioPikaWrapper: 36 | def __init__(self, host: str = "localhost"): 37 | self.amqp_url = f"amqp://{host}/" 38 | self.connection = None 39 | self.channel = None 40 | 41 | def get_connection(self): 42 | assert self.connection is not None 43 | return self.connection 44 | 45 | def get_channel(self): 46 | assert self.channel is not None 47 | return self.channel 48 | 49 | async def connect(self, prefetch_count: int = 0): 50 | if self.connection is not None: 51 | return 52 | self.connection = await aio_pika.connect_robust(self.amqp_url) 53 | self.channel = await self.connection.channel() 54 | await self.channel.set_qos(prefetch_count=prefetch_count) 55 | 56 | async def declare_queue(self, queue_name: str, durable: bool = True): 57 | if self.channel is None: 58 | await self.connect() 59 | assert self.channel is not None 60 | return await self.channel.declare_queue( 61 | queue_name, 62 | durable=durable, 63 | arguments={"x-message-deduplication": True, "x-max-priority": 10}, 64 | ) 65 | 66 | async def publish_message( 67 | self, 68 | queue_name: str, 69 | message: dict[str, Any], 70 | durable: bool = True, 71 | priority: int = 0, 72 | ): 73 | if self.channel is None: 74 | await self.connect() 75 | await self.declare_queue(queue_name, durable) 76 | assert self.channel is not None 77 | 78 | message_id = message.get( 79 | "id", "default_id" 80 | ) # Ensure you have a unique ID for deduplication 81 | headers = {"x-deduplication-header": message_id} 82 | try: 83 | await self.channel.default_exchange.publish( 84 | aio_pika.Message( 85 | body=ujson.dumps(message).encode(), 86 | delivery_mode=( 87 | aio_pika.DeliveryMode.PERSISTENT 88 | if durable 89 | else aio_pika.DeliveryMode.NOT_PERSISTENT 90 | ), 91 | headers=headers, 92 | priority=priority, 93 | ), 94 | routing_key=queue_name, 95 | ) 96 | print(f"Message published to {queue_name}") 97 | return True 98 | except aio_pika.exceptions.DeliveryError as e: 99 | print(f"Error publishing message: {e}") 100 | return False 101 | 102 | async def publish_notification(self, message: dict[str, Any]): 103 | await self.publish_message("telegram_notifications", message) 104 | await self.publish_message("discord_notifications", message) 105 | 106 | async def close(self): 107 | if self.connection: 108 | await self.connection.close() 109 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/content_manager.py: -------------------------------------------------------------------------------- 1 | from itertools import chain 2 | from typing import TYPE_CHECKING, Any 3 | 4 | if TYPE_CHECKING: 5 | from ultima_scraper_api import auth_types 6 | from ultima_scraper_collection.managers.metadata_manager.metadata_manager import ( 7 | MediaMetadata, 8 | ) 9 | 10 | 11 | class DefaultCategorizedContent: 12 | def __init__(self) -> None: 13 | self.MassMessages: dict[int, dict[str, Any]] = {} 14 | self.Stories: dict[int, dict[str, Any]] = {} 15 | self.Posts: dict[int, dict[str, Any]] = {} 16 | self.Chats: dict[int, dict[str, Any]] = {} 17 | self.Messages: dict[int, dict[str, Any]] = {} 18 | self.Highlights: dict[int, dict[str, Any]] = {} 19 | 20 | def __iter__(self): 21 | for attr, value in self.__dict__.items(): 22 | yield attr, value 23 | 24 | def find_content(self, content_id: int, content_type: str): 25 | return getattr(self, content_type)[content_id] 26 | 27 | 28 | class ContentManager: 29 | def __init__(self, authed: "auth_types") -> None: 30 | self.authed = authed 31 | self.auth_session = authed.auth_session 32 | self.categorized = DefaultCategorizedContent() 33 | self.media_manager: MediaManager = MediaManager() 34 | 35 | def get_contents(self, content_type: str): 36 | return getattr(self.categorized, content_type) 37 | 38 | def set_content(self, content_type: str, scraped: list[Any]): 39 | for content in scraped: 40 | content_item = getattr(self.categorized, content_type) 41 | content_item[content.content_id] = content 42 | 43 | def find_content(self, content_id: int, content_type: str): 44 | found_content = None 45 | try: 46 | found_content = getattr(self.categorized, content_type)[content_id] 47 | except KeyError: 48 | pass 49 | return found_content 50 | 51 | def find_media(self, category: str, media_id: int): 52 | content_items = getattr(self.categorized, category) 53 | for content in content_items.values(): 54 | for media in content.medias: 55 | if media.id == media_id: 56 | return media 57 | 58 | def get_all_media_ids(self): 59 | return list(chain(*[x for x in self.categorized.__dict__.values()])) 60 | 61 | 62 | class MediaManager: 63 | def __init__(self) -> None: 64 | self.medias: dict[int, "MediaMetadata"] = {} 65 | self.invalid_medias: list["MediaMetadata"] = [] 66 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = alembic 6 | 7 | # template used to generate migration files 8 | # file_template = %%(rev)s_%%(slug)s 9 | 10 | # sys.path path, will be prepended to sys.path if present. 11 | # defaults to the current working directory. 12 | prepend_sys_path = . 13 | 14 | # timezone to use when rendering the date 15 | # within the migration file as well as the filename. 16 | # string value is passed to dateutil.tz.gettz() 17 | # leave blank for localtime 18 | # timezone = 19 | 20 | # max length of characters to apply to the 21 | # "slug" field 22 | # truncate_slug_length = 40 23 | 24 | # set to 'true' to run the environment during 25 | # the 'revision' command, regardless of autogenerate 26 | # revision_environment = false 27 | 28 | # set to 'true' to allow .pyc and .pyo files without 29 | # a source .py file to be detected as revisions in the 30 | # versions/ directory 31 | # sourceless = false 32 | 33 | # version location specification; this defaults 34 | # to alembic/versions. When using multiple version 35 | # directories, initial revisions must be specified with --version-path 36 | # version_locations = %(here)s/bar %(here)s/bat alembic/versions 37 | 38 | # the output encoding used when revision files 39 | # are written from script.py.mako 40 | # output_encoding = utf-8 41 | 42 | sqlalchemy.url = driver://user:pass@localhost/dbname 43 | 44 | 45 | [post_write_hooks] 46 | # post_write_hooks defines scripts or Python functions that are run 47 | # on newly generated revision scripts. See the documentation for further 48 | # detail and examples 49 | 50 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 51 | # hooks = black 52 | # black.type = console_scripts 53 | # black.entrypoint = black 54 | # black.options = -l 79 REVISION_SCRIPT_FILENAME 55 | 56 | # Logging configuration 57 | [loggers] 58 | keys = root,sqlalchemy,alembic 59 | 60 | [handlers] 61 | keys = console 62 | 63 | [formatters] 64 | keys = generic 65 | 66 | [logger_root] 67 | level = WARN 68 | handlers = console 69 | qualname = 70 | 71 | [logger_sqlalchemy] 72 | level = WARN 73 | handlers = 74 | qualname = sqlalchemy.engine 75 | 76 | [logger_alembic] 77 | level = WARN 78 | handlers = 79 | qualname = alembic 80 | 81 | [handler_console] 82 | class = StreamHandler 83 | args = (sys.stderr,) 84 | level = NOTSET 85 | formatter = generic 86 | 87 | [formatter_generic] 88 | format = %(levelname)-5.5s [%(name)s] %(message)s 89 | datefmt = %H:%M:%S 90 | 91 | [custom] 92 | database_name = None 93 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/env.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | from logging.config import fileConfig 3 | 4 | from alembic import context 5 | from sqlalchemy import engine_from_config, pool 6 | 7 | # this is the Alembic Config object, which provides 8 | # access to the values within the .ini file in use. 9 | config = context.config 10 | 11 | # Interpret the config file for Python logging. 12 | # This line sets up loggers basically. 13 | fileConfig(config.config_file_name) 14 | 15 | # add your model's MetaData object here 16 | # for 'autogenerate' support 17 | # from myapp import mymodel 18 | # target_metadata = mymodel.Base.metadata 19 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models import ( 20 | user_database, 21 | ) 22 | 23 | target_metadata = user_database.Base.metadata 24 | pass 25 | # other values from the config, defined by the needs of env.py, 26 | # can be acquired: 27 | # my_important_option = config.get_main_option("my_important_option") 28 | # ... etc. 29 | 30 | 31 | def run_migrations_offline(): 32 | """Run migrations in 'offline' mode. 33 | 34 | This configures the context with just a URL 35 | and not an Engine, though an Engine is acceptable 36 | here as well. By skipping the Engine creation 37 | we don't even need a DBAPI to be available. 38 | 39 | Calls to context.execute() here emit the given string to the 40 | script output. 41 | 42 | """ 43 | url = config.get_main_option("sqlalchemy.url") 44 | context.configure( 45 | url=url, 46 | target_metadata=target_metadata, 47 | literal_binds=True, 48 | dialect_opts={"paramstyle": "named"}, 49 | ) 50 | 51 | with context.begin_transaction(): 52 | context.run_migrations() 53 | 54 | 55 | def run_migrations_online(): 56 | """Run migrations in 'online' mode. 57 | 58 | In this scenario we need to create an Engine 59 | and associate a connection with the context. 60 | 61 | """ 62 | connectable = engine_from_config( 63 | config.get_section(config.config_ini_section), 64 | prefix="sqlalchemy.", 65 | poolclass=pool.NullPool, 66 | ) 67 | 68 | with connectable.connect() as connection: 69 | context.configure(connection=connection, target_metadata=target_metadata) 70 | 71 | with context.begin_transaction(): 72 | context.run_migrations() 73 | 74 | 75 | if context.is_offline_mode(): 76 | run_migrations_offline() 77 | else: 78 | run_migrations_online() 79 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade(): 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade(): 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/versions/0d4d92c0498e_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | 3 | """content 4 | 5 | Revision ID: 0d4d92c0498e 6 | Revises: b791cf213df9 7 | Create Date: 2022-01-14 20:15:27.019051 8 | 9 | """ 10 | import sqlalchemy as sa 11 | from alembic import op 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "0d4d92c0498e" 15 | down_revision = "b791cf213df9" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | op.add_column("products", sa.Column("title", sa.String(), nullable=True)) 23 | # ### end Alembic commands ### 24 | 25 | 26 | def downgrade(): 27 | # ### commands auto generated by Alembic - please adjust! ### 28 | op.drop_column("products", "title") 29 | # ### end Alembic commands ### 30 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/versions/1454e4d1c6b8_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | 3 | """content 4 | 5 | Revision ID: 1454e4d1c6b8 6 | Revises: 37c4f2719d65 7 | Create Date: 2023-02-12 23:17:29.239758 8 | 9 | """ 10 | import sqlalchemy as sa 11 | from alembic import op 12 | from sqlalchemy import Column, Integer 13 | 14 | # revision identifiers, used by Alembic. 15 | revision = "1454e4d1c6b8" 16 | down_revision = "37c4f2719d65" 17 | branch_labels = None 18 | depends_on = None 19 | 20 | 21 | def upgrade(): 22 | # ### commands auto generated by Alembic - please adjust! ### 23 | conn = op.get_bind() 24 | 25 | meta = sa.MetaData() 26 | meta.reflect(bind=conn, only=("medias",)) 27 | old_table = meta.tables["medias"] 28 | if "media_id_2" not in old_table.columns: 29 | with op.batch_alter_table("medias", recreate="always") as batch_op: 30 | batch_op.add_column(Column("media_id_2", Integer), insert_after="id") 31 | 32 | pass 33 | # ### end Alembic commands ### 34 | 35 | 36 | def downgrade(): 37 | # ### commands auto generated by Alembic - please adjust! ### 38 | pass 39 | # ### end Alembic commands ### 40 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/versions/37c4f2719d65_content.py: -------------------------------------------------------------------------------- 1 | """content 2 | 3 | Revision ID: 37c4f2719d65 4 | Revises: 0d4d92c0498e 5 | Create Date: 2022-03-10 16:00:20.390009 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = "37c4f2719d65" 14 | down_revision = "0d4d92c0498e" 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table( 22 | "profiles", 23 | sa.Column("id", sa.Integer(), nullable=False), 24 | sa.Column("user_id", sa.Integer(), nullable=False), 25 | sa.Column("username", sa.String(), nullable=False), 26 | sa.PrimaryKeyConstraint("id"), 27 | sa.UniqueConstraint("username"), 28 | ) 29 | # ### end Alembic commands ### 30 | 31 | 32 | def downgrade(): 33 | # ### commands auto generated by Alembic - please adjust! ### 34 | op.drop_table("profiles") 35 | # ### end Alembic commands ### 36 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/versions/5493253cc03c_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: 5493253cc03c 5 | Revises: 6 | Create Date: 2021-06-21 14:22:30.585216 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "5493253cc03c" 15 | down_revision = None 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | op.create_table( 23 | "medias", 24 | sa.Column("id", sa.Integer(), nullable=False), 25 | sa.Column("media_id", sa.Integer(), nullable=True), 26 | sa.Column("post_id", sa.Integer(), nullable=False), 27 | sa.Column("link", sa.String(), nullable=True), 28 | sa.Column("directory", sa.String(), nullable=True), 29 | sa.Column("filename", sa.String(), nullable=True), 30 | sa.Column("size", sa.Integer(), nullable=True), 31 | sa.Column("api_type", sa.String(), nullable=True), 32 | sa.Column("media_type", sa.String(), nullable=True), 33 | sa.Column("preview", sa.Integer(), nullable=True), 34 | sa.Column("linked", sa.String(), nullable=True), 35 | sa.Column("downloaded", sa.Integer(), nullable=True), 36 | sa.Column("created_at", sa.TIMESTAMP(), nullable=True), 37 | sa.PrimaryKeyConstraint("id"), 38 | sa.UniqueConstraint("media_id"), 39 | ) 40 | op.create_table( 41 | "messages", 42 | sa.Column("id", sa.Integer(), nullable=False), 43 | sa.Column("post_id", sa.Integer(), nullable=False), 44 | sa.Column("text", sa.String(), nullable=True), 45 | sa.Column("price", sa.Integer(), nullable=True), 46 | sa.Column("paid", sa.Integer(), nullable=True), 47 | sa.Column("archived", sa.Boolean(), nullable=True), 48 | sa.Column("created_at", sa.TIMESTAMP(), nullable=True), 49 | sa.Column("user_id", sa.Integer(), nullable=True), 50 | sa.PrimaryKeyConstraint("id"), 51 | sa.UniqueConstraint("post_id"), 52 | ) 53 | op.create_table( 54 | "posts", 55 | sa.Column("id", sa.Integer(), nullable=False), 56 | sa.Column("post_id", sa.Integer(), nullable=False), 57 | sa.Column("text", sa.String(), nullable=True), 58 | sa.Column("price", sa.Integer(), nullable=True), 59 | sa.Column("paid", sa.Integer(), nullable=True), 60 | sa.Column("archived", sa.Boolean(), nullable=True), 61 | sa.Column("created_at", sa.TIMESTAMP(), nullable=True), 62 | sa.PrimaryKeyConstraint("id"), 63 | sa.UniqueConstraint("post_id"), 64 | ) 65 | op.create_table( 66 | "stories", 67 | sa.Column("id", sa.Integer(), nullable=False), 68 | sa.Column("post_id", sa.Integer(), nullable=False), 69 | sa.Column("text", sa.String(), nullable=True), 70 | sa.Column("price", sa.Integer(), nullable=True), 71 | sa.Column("paid", sa.Integer(), nullable=True), 72 | sa.Column("archived", sa.Boolean(), nullable=True), 73 | sa.Column("created_at", sa.TIMESTAMP(), nullable=True), 74 | sa.PrimaryKeyConstraint("id"), 75 | sa.UniqueConstraint("post_id"), 76 | ) 77 | # ### end Alembic commands ### 78 | 79 | 80 | def downgrade(): 81 | # ### commands auto generated by Alembic - please adjust! ### 82 | op.drop_table("stories") 83 | op.drop_table("posts") 84 | op.drop_table("messages") 85 | op.drop_table("medias") 86 | # ### end Alembic commands ### 87 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/versions/b791cf213df9_content.py: -------------------------------------------------------------------------------- 1 | """content 2 | 3 | Revision ID: b791cf213df9 4 | Revises: 5493253cc03c 5 | Create Date: 2021-11-16 16:33:04.723478 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = "b791cf213df9" 14 | down_revision = "5493253cc03c" 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table( 22 | "others", 23 | sa.Column("id", sa.Integer(), nullable=False), 24 | sa.Column("post_id", sa.Integer(), nullable=False), 25 | sa.Column("text", sa.String(), nullable=True), 26 | sa.Column("price", sa.Integer(), nullable=True), 27 | sa.Column("paid", sa.Integer(), nullable=True), 28 | sa.Column("archived", sa.Boolean(), nullable=True), 29 | sa.Column("created_at", sa.TIMESTAMP(), nullable=True), 30 | sa.PrimaryKeyConstraint("id"), 31 | sa.UniqueConstraint("post_id"), 32 | ) 33 | op.create_table( 34 | "products", 35 | sa.Column("id", sa.Integer(), nullable=False), 36 | sa.Column("post_id", sa.Integer(), nullable=False), 37 | sa.Column("text", sa.String(), nullable=True), 38 | sa.Column("price", sa.Integer(), nullable=True), 39 | sa.Column("paid", sa.Integer(), nullable=True), 40 | sa.Column("archived", sa.Boolean(), nullable=True), 41 | sa.Column("created_at", sa.TIMESTAMP(), nullable=True), 42 | sa.PrimaryKeyConstraint("id"), 43 | sa.UniqueConstraint("post_id"), 44 | ) 45 | # ### end Alembic commands ### 46 | 47 | 48 | def downgrade(): 49 | # ### commands auto generated by Alembic - please adjust! ### 50 | op.drop_table("products") 51 | op.drop_table("others") 52 | # ### end Alembic commands ### 53 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/versions/d2f2002f3c36_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | 3 | """content 4 | 5 | Revision ID: d2f2002f3c36 6 | Revises: 1454e4d1c6b8 7 | Create Date: 2023-02-13 22:40:57.202281 8 | 9 | """ 10 | import sqlalchemy as sa 11 | from alembic import op 12 | from sqlalchemy.orm import Session 13 | 14 | # revision identifiers, used by Alembic. 15 | revision = "d2f2002f3c36" 16 | down_revision = "1454e4d1c6b8" 17 | branch_labels = None 18 | depends_on = None 19 | 20 | 21 | def upgrade(): 22 | # ### commands auto generated by Alembic - please adjust! ### 23 | invalid_conn = op.get_bind() 24 | database_url = str(invalid_conn.engine.url) 25 | conn = sa.create_engine(database_url) 26 | session = Session(bind=conn) 27 | res = session.execute(sa.text("SELECT id,media_id FROM medias;")) 28 | results = res.fetchall() 29 | meta = sa.MetaData() 30 | meta.reflect(bind=conn, only=("medias",)) 31 | old_table = meta.tables["medias"] 32 | 33 | session = Session(bind=conn) 34 | for items in results: 35 | formatted = dict(items._mapping) 36 | ( 37 | session.query(old_table) 38 | .filter(old_table.c.id == formatted["id"]) 39 | .update({"media_id_2": formatted["media_id"]}) 40 | ) 41 | session.commit() 42 | with op.batch_alter_table("medias") as batch_op: 43 | batch_op.drop_column("media_id") 44 | batch_op.alter_column("media_id_2", new_column_name="media_id") 45 | # ### end Alembic commands ### 46 | 47 | 48 | def downgrade(): 49 | # ### commands auto generated by Alembic - please adjust! ### 50 | pass 51 | # ### end Alembic commands ### 52 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/base_user_database.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/base_user_database.db -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = alembic 6 | 7 | # template used to generate migration files 8 | # file_template = %%(rev)s_%%(slug)s 9 | 10 | # timezone to use when rendering the date 11 | # within the migration file as well as the filename. 12 | # string value is passed to dateutil.tz.gettz() 13 | # leave blank for localtime 14 | # timezone = 15 | 16 | # max length of characters to apply to the 17 | # "slug" field 18 | # truncate_slug_length = 40 19 | 20 | # set to 'true' to run the environment during 21 | # the 'revision' command, regardless of autogenerate 22 | # revision_environment = false 23 | 24 | # set to 'true' to allow .pyc and .pyo files without 25 | # a source .py file to be detected as revisions in the 26 | # versions/ directory 27 | # sourceless = false 28 | 29 | # version location specification; this defaults 30 | # to alembic/versions. When using multiple version 31 | # directories, initial revisions must be specified with --version-path 32 | # version_locations = %(here)s/bar %(here)s/bat alembic/versions 33 | 34 | # the output encoding used when revision files 35 | # are written from script.py.mako 36 | # output_encoding = utf-8 37 | 38 | sqlalchemy.url = driver://user:pass@localhost/dbname 39 | 40 | 41 | [post_write_hooks] 42 | # post_write_hooks defines scripts or Python functions that are run 43 | # on newly generated revision scripts. See the documentation for further 44 | # detail and examples 45 | 46 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 47 | # hooks=black 48 | # black.type=console_scripts 49 | # black.entrypoint=black 50 | # black.options=-l 79 51 | 52 | # Logging configuration 53 | [loggers] 54 | keys = root,sqlalchemy,alembic 55 | 56 | [handlers] 57 | keys = console 58 | 59 | [formatters] 60 | keys = generic 61 | 62 | [logger_root] 63 | level = WARN 64 | handlers = console 65 | qualname = 66 | 67 | [logger_sqlalchemy] 68 | level = WARN 69 | handlers = 70 | qualname = sqlalchemy.engine 71 | 72 | [logger_alembic] 73 | level = INFO 74 | handlers = 75 | qualname = alembic 76 | 77 | [handler_console] 78 | class = StreamHandler 79 | args = (sys.stderr,) 80 | level = NOTSET 81 | formatter = generic 82 | 83 | [formatter_generic] 84 | format = %(levelname)-5.5s [%(name)s] %(message)s 85 | datefmt = %H:%M:%S 86 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/env.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | from logging.config import fileConfig 3 | 4 | from alembic import context 5 | from sqlalchemy import engine_from_config, pool 6 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.legacy_databases.messages.migration import ( 7 | messages, 8 | ) 9 | 10 | # this is the Alembic Config object, which provides 11 | # access to the values within the .ini file in use. 12 | config = context.config 13 | 14 | # Interpret the config file for Python logging. 15 | # This line sets up loggers basically. 16 | fileConfig(config.config_file_name, disable_existing_loggers=False) 17 | 18 | # add your model's MetaData object here 19 | # for 'autogenerate' support 20 | # from myapp import mymodel 21 | # target_metadata = mymodel.Base.metadata 22 | target_metadata = messages.Base.metadata 23 | 24 | # other values from the config, defined by the needs of env.py, 25 | # can be acquired: 26 | # my_important_option = config.get_main_option("my_important_option") 27 | # ... etc. 28 | 29 | 30 | def run_migrations_offline(): 31 | """Run migrations in 'offline' mode. 32 | 33 | This configures the context with just a URL 34 | and not an Engine, though an Engine is acceptable 35 | here as well. By skipping the Engine creation 36 | we don't even need a DBAPI to be available. 37 | 38 | Calls to context.execute() here emit the given string to the 39 | script output. 40 | 41 | """ 42 | url = config.get_main_option("sqlalchemy.url") 43 | context.configure( 44 | url=url, 45 | target_metadata=target_metadata, 46 | literal_binds=True, 47 | dialect_opts={"paramstyle": "named"}, 48 | ) 49 | 50 | with context.begin_transaction(): 51 | context.run_migrations() 52 | 53 | 54 | def run_migrations_online(): 55 | """Run migrations in 'online' mode. 56 | 57 | In this scenario we need to create an Engine 58 | and associate a connection with the context. 59 | 60 | """ 61 | connectable = engine_from_config( 62 | config.get_section(config.config_ini_section), 63 | prefix="sqlalchemy.", 64 | poolclass=pool.NullPool, 65 | ) 66 | 67 | with connectable.connect() as connection: 68 | context.configure( 69 | connection=connection, 70 | target_metadata=target_metadata, 71 | render_as_batch=True, 72 | compare_type=True, 73 | ) 74 | 75 | with context.begin_transaction(): 76 | context.run_migrations() 77 | 78 | 79 | if context.is_offline_mode(): 80 | run_migrations_offline() 81 | else: 82 | run_migrations_online() 83 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade(): 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade(): 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/versions/2c36fcc0b921_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | 3 | """content 4 | 5 | Revision ID: 2c36fcc0b921 6 | Revises: 7 | Create Date: 2021-01-08 20:25:52.456387 8 | 9 | """ 10 | from alembic import op 11 | import sqlalchemy as sa 12 | 13 | 14 | # revision identifiers, used by Alembic. 15 | revision = "2c36fcc0b921" 16 | down_revision = None 17 | branch_labels = None 18 | depends_on = None 19 | 20 | 21 | def upgrade(): 22 | # ### commands auto generated by Alembic - please adjust! ### 23 | op.create_table( 24 | "medias", 25 | sa.Column("id", sa.Integer(), nullable=False), 26 | sa.Column("media_id", sa.Integer(), nullable=True), 27 | sa.Column("post_id", sa.Integer(), nullable=False), 28 | sa.Column("link", sa.String(), nullable=True), 29 | sa.Column("directory", sa.String(), nullable=True), 30 | sa.Column("filename", sa.String(), nullable=True), 31 | sa.Column("size", sa.Integer(), nullable=True), 32 | sa.Column("media_type", sa.String(), nullable=True), 33 | sa.Column("downloaded", sa.Integer(), nullable=True), 34 | sa.Column("created_at", sa.DATETIME(), nullable=True), 35 | sa.PrimaryKeyConstraint("id"), 36 | sa.UniqueConstraint("media_id"), 37 | ) 38 | op.create_table( 39 | "messages", 40 | sa.Column("id", sa.Integer(), nullable=False), 41 | sa.Column("post_id", sa.Integer(), nullable=False), 42 | sa.Column("text", sa.String(), nullable=True), 43 | sa.Column("price", sa.Integer(), nullable=True), 44 | sa.Column("paid", sa.Integer(), nullable=True), 45 | sa.Column("created_at", sa.DATETIME(), nullable=True), 46 | sa.PrimaryKeyConstraint("id"), 47 | sa.UniqueConstraint("post_id"), 48 | ) 49 | # ### end Alembic commands ### 50 | 51 | 52 | def downgrade(): 53 | # ### commands auto generated by Alembic - please adjust! ### 54 | op.drop_table("messages") 55 | op.drop_table("medias") 56 | # ### end Alembic commands ### 57 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/versions/7c1c6e101059_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: 7c1c6e101059 5 | Revises: aeb9fe314556 6 | Create Date: 2021-05-31 02:56:29.998095 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "7c1c6e101059" 15 | down_revision = "aeb9fe314556" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | with op.batch_alter_table("medias", schema=None) as batch_op: 23 | batch_op.alter_column( 24 | "created_at", 25 | existing_type=sa.DATETIME(), 26 | type_=sa.TIMESTAMP(), 27 | existing_nullable=True, 28 | ) 29 | 30 | with op.batch_alter_table("messages", schema=None) as batch_op: 31 | batch_op.add_column(sa.Column("archived", sa.Boolean(), nullable=True)) 32 | batch_op.alter_column( 33 | "created_at", 34 | existing_type=sa.DATETIME(), 35 | type_=sa.TIMESTAMP(), 36 | existing_nullable=True, 37 | ) 38 | 39 | # ### end Alembic commands ### 40 | 41 | 42 | def downgrade(): 43 | # ### commands auto generated by Alembic - please adjust! ### 44 | with op.batch_alter_table("messages", schema=None) as batch_op: 45 | batch_op.alter_column( 46 | "created_at", 47 | existing_type=sa.TIMESTAMP(), 48 | type_=sa.DATETIME(), 49 | existing_nullable=True, 50 | ) 51 | batch_op.drop_column("archived") 52 | 53 | with op.batch_alter_table("medias", schema=None) as batch_op: 54 | batch_op.alter_column( 55 | "created_at", 56 | existing_type=sa.TIMESTAMP(), 57 | type_=sa.DATETIME(), 58 | existing_nullable=True, 59 | ) 60 | 61 | # ### end Alembic commands ### 62 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/versions/aeb9fe314556_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: aeb9fe314556 5 | Revises: d0118d8ec0b4 6 | Create Date: 2021-02-14 19:56:59.175268 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "aeb9fe314556" 15 | down_revision = "d0118d8ec0b4" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | with op.batch_alter_table("medias", schema=None) as batch_op: 23 | batch_op.add_column(sa.Column("linked", sa.String(), nullable=True)) 24 | 25 | # ### end Alembic commands ### 26 | 27 | 28 | def downgrade(): 29 | # ### commands auto generated by Alembic - please adjust! ### 30 | with op.batch_alter_table("medias", schema=None) as batch_op: 31 | batch_op.drop_column("linked") 32 | 33 | # ### end Alembic commands ### 34 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/versions/bf20242a238f_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: bf20242a238f 5 | Revises: 7c1c6e101059 6 | Create Date: 2021-06-20 12:42:35.578665 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "bf20242a238f" 15 | down_revision = "7c1c6e101059" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | with op.batch_alter_table("medias", schema=None) as batch_op: 23 | batch_op.add_column(sa.Column("api_type", sa.String(), nullable=True)) 24 | 25 | # ### end Alembic commands ### 26 | 27 | 28 | def downgrade(): 29 | # ### commands auto generated by Alembic - please adjust! ### 30 | with op.batch_alter_table("medias", schema=None) as batch_op: 31 | batch_op.drop_column("api_type") 32 | 33 | # ### end Alembic commands ### 34 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/versions/d0118d8ec0b4_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: d0118d8ec0b4 5 | Revises: 2c36fcc0b921 6 | Create Date: 2021-02-04 02:59:06.516503 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "d0118d8ec0b4" 15 | down_revision = "2c36fcc0b921" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | with op.batch_alter_table("medias", schema=None) as batch_op: 23 | batch_op.add_column(sa.Column("preview", sa.Integer(), nullable=True)) 24 | 25 | # ### end Alembic commands ### 26 | 27 | 28 | def downgrade(): 29 | # ### commands auto generated by Alembic - please adjust! ### 30 | with op.batch_alter_table("medias", schema=None) as batch_op: 31 | batch_op.drop_column("preview") 32 | 33 | # ### end Alembic commands ### 34 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/messages.py: -------------------------------------------------------------------------------- 1 | ### messages.py ### 2 | 3 | # type: ignore 4 | from sqlalchemy.orm import declarative_base 5 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.api_model import ( 6 | ApiModel, 7 | ) 8 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.media_model import ( 9 | TemplateMediaModel, 10 | ) 11 | 12 | Base = declarative_base() 13 | 14 | 15 | class api_table(ApiModel, Base): 16 | ApiModel.__tablename__ = "messages" 17 | 18 | 19 | class TemplateMediaModel(TemplateMediaModel, Base): 20 | pass 21 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/test_messages.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/test_messages.db -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = alembic 6 | 7 | # template used to generate migration files 8 | # file_template = %%(rev)s_%%(slug)s 9 | 10 | # timezone to use when rendering the date 11 | # within the migration file as well as the filename. 12 | # string value is passed to dateutil.tz.gettz() 13 | # leave blank for localtime 14 | # timezone = 15 | 16 | # max length of characters to apply to the 17 | # "slug" field 18 | # truncate_slug_length = 40 19 | 20 | # set to 'true' to run the environment during 21 | # the 'revision' command, regardless of autogenerate 22 | # revision_environment = false 23 | 24 | # set to 'true' to allow .pyc and .pyo files without 25 | # a source .py file to be detected as revisions in the 26 | # versions/ directory 27 | # sourceless = false 28 | 29 | # version location specification; this defaults 30 | # to alembic/versions. When using multiple version 31 | # directories, initial revisions must be specified with --version-path 32 | # version_locations = %(here)s/bar %(here)s/bat alembic/versions 33 | 34 | # the output encoding used when revision files 35 | # are written from script.py.mako 36 | # output_encoding = utf-8 37 | 38 | sqlalchemy.url = driver://user:pass@localhost/dbname 39 | 40 | 41 | [post_write_hooks] 42 | # post_write_hooks defines scripts or Python functions that are run 43 | # on newly generated revision scripts. See the documentation for further 44 | # detail and examples 45 | 46 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 47 | # hooks=black 48 | # black.type=console_scripts 49 | # black.entrypoint=black 50 | # black.options=-l 79 51 | 52 | # Logging configuration 53 | [loggers] 54 | keys = root,sqlalchemy,alembic 55 | 56 | [handlers] 57 | keys = console 58 | 59 | [formatters] 60 | keys = generic 61 | 62 | [logger_root] 63 | level = WARN 64 | handlers = console 65 | qualname = 66 | 67 | [logger_sqlalchemy] 68 | level = WARN 69 | handlers = 70 | qualname = sqlalchemy.engine 71 | 72 | [logger_alembic] 73 | level = INFO 74 | handlers = 75 | qualname = alembic 76 | 77 | [handler_console] 78 | class = StreamHandler 79 | args = (sys.stderr,) 80 | level = NOTSET 81 | formatter = generic 82 | 83 | [formatter_generic] 84 | format = %(levelname)-5.5s [%(name)s] %(message)s 85 | datefmt = %H:%M:%S 86 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/env.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | from logging.config import fileConfig 3 | 4 | from alembic import context 5 | from sqlalchemy import engine_from_config, pool 6 | 7 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.legacy_databases.posts.migration import ( 8 | posts, 9 | ) 10 | 11 | # this is the Alembic Config object, which provides 12 | # access to the values within the .ini file in use. 13 | config = context.config 14 | 15 | # Interpret the config file for Python logging. 16 | # This line sets up loggers basically. 17 | fileConfig(config.config_file_name, disable_existing_loggers=False) 18 | 19 | # add your model's MetaData object here 20 | # for 'autogenerate' support 21 | # from myapp import mymodel 22 | # target_metadata = mymodel.Base.metadata 23 | target_metadata = posts.Base.metadata 24 | 25 | # other values from the config, defined by the needs of env.py, 26 | # can be acquired: 27 | # my_important_option = config.get_main_option("my_important_option") 28 | # ... etc. 29 | 30 | 31 | def run_migrations_offline(): 32 | """Run migrations in 'offline' mode. 33 | 34 | This configures the context with just a URL 35 | and not an Engine, though an Engine is acceptable 36 | here as well. By skipping the Engine creation 37 | we don't even need a DBAPI to be available. 38 | 39 | Calls to context.execute() here emit the given string to the 40 | script output. 41 | 42 | """ 43 | url = config.get_main_option("sqlalchemy.url") 44 | context.configure( 45 | url=url, 46 | target_metadata=target_metadata, 47 | literal_binds=True, 48 | dialect_opts={"paramstyle": "named"}, 49 | ) 50 | 51 | with context.begin_transaction(): 52 | context.run_migrations() 53 | 54 | 55 | def run_migrations_online(): 56 | """Run migrations in 'online' mode. 57 | 58 | In this scenario we need to create an Engine 59 | and associate a connection with the context. 60 | 61 | """ 62 | connectable = engine_from_config( 63 | config.get_section(config.config_ini_section), 64 | prefix="sqlalchemy.", 65 | poolclass=pool.NullPool, 66 | ) 67 | 68 | with connectable.connect() as connection: 69 | context.configure( 70 | connection=connection, 71 | target_metadata=target_metadata, 72 | render_as_batch=True, 73 | compare_type=True, 74 | ) 75 | 76 | with context.begin_transaction(): 77 | context.run_migrations() 78 | 79 | 80 | if context.is_offline_mode(): 81 | run_migrations_offline() 82 | else: 83 | run_migrations_online() 84 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade(): 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade(): 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/versions/194e05269f09_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: 194e05269f09 5 | Revises: 6 | Create Date: 2021-01-08 20:25:16.796179 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "194e05269f09" 15 | down_revision = None 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | op.create_table( 23 | "medias", 24 | sa.Column("id", sa.Integer(), nullable=False), 25 | sa.Column("media_id", sa.Integer(), nullable=True), 26 | sa.Column("post_id", sa.Integer(), nullable=False), 27 | sa.Column("link", sa.String(), nullable=True), 28 | sa.Column("directory", sa.String(), nullable=True), 29 | sa.Column("filename", sa.String(), nullable=True), 30 | sa.Column("size", sa.Integer(), nullable=True), 31 | sa.Column("media_type", sa.String(), nullable=True), 32 | sa.Column("downloaded", sa.Integer(), nullable=True), 33 | sa.Column("created_at", sa.DATETIME(), nullable=True), 34 | sa.PrimaryKeyConstraint("id"), 35 | sa.UniqueConstraint("media_id"), 36 | ) 37 | op.create_table( 38 | "posts", 39 | sa.Column("id", sa.Integer(), nullable=False), 40 | sa.Column("post_id", sa.Integer(), nullable=False), 41 | sa.Column("text", sa.String(), nullable=True), 42 | sa.Column("price", sa.Integer(), nullable=True), 43 | sa.Column("paid", sa.Integer(), nullable=True), 44 | sa.Column("created_at", sa.DATETIME(), nullable=True), 45 | sa.PrimaryKeyConstraint("id"), 46 | sa.UniqueConstraint("post_id"), 47 | ) 48 | # ### end Alembic commands ### 49 | 50 | 51 | def downgrade(): 52 | # ### commands auto generated by Alembic - please adjust! ### 53 | op.drop_table("posts") 54 | op.drop_table("medias") 55 | # ### end Alembic commands ### 56 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/versions/5b4bea08c27f_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: 5b4bea08c27f 5 | Revises: 194e05269f09 6 | Create Date: 2021-02-04 02:59:05.010106 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "5b4bea08c27f" 15 | down_revision = "194e05269f09" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | with op.batch_alter_table("medias", schema=None) as batch_op: 23 | batch_op.add_column(sa.Column("preview", sa.Integer(), nullable=True)) 24 | 25 | # ### end Alembic commands ### 26 | 27 | 28 | def downgrade(): 29 | # ### commands auto generated by Alembic - please adjust! ### 30 | with op.batch_alter_table("medias", schema=None) as batch_op: 31 | batch_op.drop_column("preview") 32 | 33 | # ### end Alembic commands ### 34 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/versions/6b1b10eb67de_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: 6b1b10eb67de 5 | Revises: 5b4bea08c27f 6 | Create Date: 2021-02-14 19:56:56.267261 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "6b1b10eb67de" 15 | down_revision = "5b4bea08c27f" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | with op.batch_alter_table("medias", schema=None) as batch_op: 23 | batch_op.add_column(sa.Column("linked", sa.String(), nullable=True)) 24 | 25 | # ### end Alembic commands ### 26 | 27 | 28 | def downgrade(): 29 | # ### commands auto generated by Alembic - please adjust! ### 30 | with op.batch_alter_table("medias", schema=None) as batch_op: 31 | batch_op.drop_column("linked") 32 | 33 | # ### end Alembic commands ### 34 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/versions/990fc1108317_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: 990fc1108317 5 | Revises: a918b6b05d2f 6 | Create Date: 2021-06-20 12:42:34.173918 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "990fc1108317" 15 | down_revision = "a918b6b05d2f" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | with op.batch_alter_table("medias", schema=None) as batch_op: 23 | batch_op.add_column(sa.Column("api_type", sa.String(), nullable=True)) 24 | 25 | # ### end Alembic commands ### 26 | 27 | 28 | def downgrade(): 29 | # ### commands auto generated by Alembic - please adjust! ### 30 | with op.batch_alter_table("medias", schema=None) as batch_op: 31 | batch_op.drop_column("api_type") 32 | 33 | # ### end Alembic commands ### 34 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/versions/a918b6b05d2f_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: a918b6b05d2f 5 | Revises: 6b1b10eb67de 6 | Create Date: 2021-05-31 02:56:28.192070 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "a918b6b05d2f" 15 | down_revision = "6b1b10eb67de" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | with op.batch_alter_table("medias", schema=None) as batch_op: 23 | batch_op.alter_column( 24 | "created_at", 25 | existing_type=sa.DATETIME(), 26 | type_=sa.TIMESTAMP(), 27 | existing_nullable=True, 28 | ) 29 | 30 | with op.batch_alter_table("posts", schema=None) as batch_op: 31 | batch_op.add_column(sa.Column("archived", sa.Boolean(), nullable=True)) 32 | batch_op.alter_column( 33 | "created_at", 34 | existing_type=sa.DATETIME(), 35 | type_=sa.TIMESTAMP(), 36 | existing_nullable=True, 37 | ) 38 | 39 | # ### end Alembic commands ### 40 | 41 | 42 | def downgrade(): 43 | # ### commands auto generated by Alembic - please adjust! ### 44 | with op.batch_alter_table("posts", schema=None) as batch_op: 45 | batch_op.alter_column( 46 | "created_at", 47 | existing_type=sa.TIMESTAMP(), 48 | type_=sa.DATETIME(), 49 | existing_nullable=True, 50 | ) 51 | batch_op.drop_column("archived") 52 | 53 | with op.batch_alter_table("medias", schema=None) as batch_op: 54 | batch_op.alter_column( 55 | "created_at", 56 | existing_type=sa.TIMESTAMP(), 57 | type_=sa.DATETIME(), 58 | existing_nullable=True, 59 | ) 60 | 61 | # ### end Alembic commands ### 62 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/posts.py: -------------------------------------------------------------------------------- 1 | ### posts.py ### 2 | 3 | # type: ignore 4 | from sqlalchemy.orm import declarative_base 5 | 6 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.api_model import ( 7 | ApiModel, 8 | ) 9 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.media_model import ( 10 | TemplateMediaModel, 11 | ) 12 | 13 | Base = declarative_base() 14 | 15 | 16 | class api_table(ApiModel, Base): 17 | ApiModel.__tablename__ = "posts" 18 | 19 | 20 | class TemplateMediaModel(TemplateMediaModel, Base): 21 | pass 22 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/test_posts.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/test_posts.db -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = alembic 6 | 7 | # template used to generate migration files 8 | # file_template = %%(rev)s_%%(slug)s 9 | 10 | # timezone to use when rendering the date 11 | # within the migration file as well as the filename. 12 | # string value is passed to dateutil.tz.gettz() 13 | # leave blank for localtime 14 | # timezone = 15 | 16 | # max length of characters to apply to the 17 | # "slug" field 18 | # truncate_slug_length = 40 19 | 20 | # set to 'true' to run the environment during 21 | # the 'revision' command, regardless of autogenerate 22 | # revision_environment = false 23 | 24 | # set to 'true' to allow .pyc and .pyo files without 25 | # a source .py file to be detected as revisions in the 26 | # versions/ directory 27 | # sourceless = false 28 | 29 | # version location specification; this defaults 30 | # to alembic/versions. When using multiple version 31 | # directories, initial revisions must be specified with --version-path 32 | # version_locations = %(here)s/bar %(here)s/bat alembic/versions 33 | 34 | # the output encoding used when revision files 35 | # are written from script.py.mako 36 | # output_encoding = utf-8 37 | 38 | sqlalchemy.url = driver://user:pass@localhost/dbname 39 | 40 | 41 | [post_write_hooks] 42 | # post_write_hooks defines scripts or Python functions that are run 43 | # on newly generated revision scripts. See the documentation for further 44 | # detail and examples 45 | 46 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 47 | # hooks=black 48 | # black.type=console_scripts 49 | # black.entrypoint=black 50 | # black.options=-l 79 51 | 52 | # Logging configuration 53 | [loggers] 54 | keys = root,sqlalchemy,alembic 55 | 56 | [handlers] 57 | keys = console 58 | 59 | [formatters] 60 | keys = generic 61 | 62 | [logger_root] 63 | level = WARN 64 | handlers = console 65 | qualname = 66 | 67 | [logger_sqlalchemy] 68 | level = WARN 69 | handlers = 70 | qualname = sqlalchemy.engine 71 | 72 | [logger_alembic] 73 | level = INFO 74 | handlers = 75 | qualname = alembic 76 | 77 | [handler_console] 78 | class = StreamHandler 79 | args = (sys.stderr,) 80 | level = NOTSET 81 | formatter = generic 82 | 83 | [formatter_generic] 84 | format = %(levelname)-5.5s [%(name)s] %(message)s 85 | datefmt = %H:%M:%S 86 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/env.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | from logging.config import fileConfig 3 | 4 | from alembic import context 5 | from sqlalchemy import engine_from_config, pool 6 | 7 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.legacy_databases.stories.migration import ( 8 | stories, 9 | ) 10 | 11 | # this is the Alembic Config object, which provides 12 | # access to the values within the .ini file in use. 13 | config = context.config 14 | 15 | # Interpret the config file for Python logging. 16 | # This line sets up loggers basically. 17 | fileConfig(config.config_file_name, disable_existing_loggers=False) 18 | 19 | # add your model's MetaData object here 20 | # for 'autogenerate' support 21 | # from myapp import mymodel 22 | # target_metadata = mymodel.Base.metadata 23 | target_metadata = stories.Base.metadata 24 | 25 | # other values from the config, defined by the needs of env.py, 26 | # can be acquired: 27 | # my_important_option = config.get_main_option("my_important_option") 28 | # ... etc. 29 | 30 | 31 | def run_migrations_offline(): 32 | """Run migrations in 'offline' mode. 33 | 34 | This configures the context with just a URL 35 | and not an Engine, though an Engine is acceptable 36 | here as well. By skipping the Engine creation 37 | we don't even need a DBAPI to be available. 38 | 39 | Calls to context.execute() here emit the given string to the 40 | script output. 41 | 42 | """ 43 | url = config.get_main_option("sqlalchemy.url") 44 | context.configure( 45 | url=url, 46 | target_metadata=target_metadata, 47 | literal_binds=True, 48 | dialect_opts={"paramstyle": "named"}, 49 | ) 50 | 51 | with context.begin_transaction(): 52 | context.run_migrations() 53 | 54 | 55 | def run_migrations_online(): 56 | """Run migrations in 'online' mode. 57 | 58 | In this scenario we need to create an Engine 59 | and associate a connection with the context. 60 | 61 | """ 62 | connectable = engine_from_config( 63 | config.get_section(config.config_ini_section), 64 | prefix="sqlalchemy.", 65 | poolclass=pool.NullPool, 66 | ) 67 | 68 | with connectable.connect() as connection: 69 | context.configure( 70 | connection=connection, 71 | target_metadata=target_metadata, 72 | render_as_batch=True, 73 | compare_type=True, 74 | ) 75 | 76 | with context.begin_transaction(): 77 | context.run_migrations() 78 | 79 | 80 | if context.is_offline_mode(): 81 | run_migrations_offline() 82 | else: 83 | run_migrations_online() 84 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade(): 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade(): 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/versions/29f675c35eee_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: 29f675c35eee 5 | Revises: 3076beb33c1b 6 | Create Date: 2021-02-04 02:59:01.746229 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "29f675c35eee" 15 | down_revision = "3076beb33c1b" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | with op.batch_alter_table("medias", schema=None) as batch_op: 23 | batch_op.add_column(sa.Column("preview", sa.Integer(), nullable=True)) 24 | 25 | # ### end Alembic commands ### 26 | 27 | 28 | def downgrade(): 29 | # ### commands auto generated by Alembic - please adjust! ### 30 | with op.batch_alter_table("medias", schema=None) as batch_op: 31 | batch_op.drop_column("preview") 32 | 33 | # ### end Alembic commands ### 34 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/versions/2e4f8364f7e2_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: 2e4f8364f7e2 5 | Revises: ebc3f4bb0782 6 | Create Date: 2021-05-31 02:56:17.448718 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "2e4f8364f7e2" 15 | down_revision = "ebc3f4bb0782" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | with op.batch_alter_table("medias", schema=None) as batch_op: 23 | batch_op.alter_column( 24 | "created_at", 25 | existing_type=sa.DATETIME(), 26 | type_=sa.TIMESTAMP(), 27 | existing_nullable=True, 28 | ) 29 | 30 | with op.batch_alter_table("stories", schema=None) as batch_op: 31 | batch_op.add_column(sa.Column("archived", sa.Boolean(), nullable=True)) 32 | batch_op.alter_column( 33 | "created_at", 34 | existing_type=sa.DATETIME(), 35 | type_=sa.TIMESTAMP(), 36 | existing_nullable=True, 37 | ) 38 | 39 | # ### end Alembic commands ### 40 | 41 | 42 | def downgrade(): 43 | # ### commands auto generated by Alembic - please adjust! ### 44 | with op.batch_alter_table("stories", schema=None) as batch_op: 45 | batch_op.alter_column( 46 | "created_at", 47 | existing_type=sa.TIMESTAMP(), 48 | type_=sa.DATETIME(), 49 | existing_nullable=True, 50 | ) 51 | batch_op.drop_column("archived") 52 | 53 | with op.batch_alter_table("medias", schema=None) as batch_op: 54 | batch_op.alter_column( 55 | "created_at", 56 | existing_type=sa.TIMESTAMP(), 57 | type_=sa.DATETIME(), 58 | existing_nullable=True, 59 | ) 60 | 61 | # ### end Alembic commands ### 62 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/versions/3076beb33c1b_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: 3076beb33c1b 5 | Revises: 6 | Create Date: 2021-01-08 23:09:26.868834 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "3076beb33c1b" 15 | down_revision = None 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | op.create_table( 23 | "medias", 24 | sa.Column("id", sa.Integer(), nullable=False), 25 | sa.Column("media_id", sa.Integer(), nullable=True), 26 | sa.Column("post_id", sa.Integer(), nullable=False), 27 | sa.Column("link", sa.String(), nullable=True), 28 | sa.Column("directory", sa.String(), nullable=True), 29 | sa.Column("filename", sa.String(), nullable=True), 30 | sa.Column("size", sa.Integer(), nullable=True), 31 | sa.Column("media_type", sa.String(), nullable=True), 32 | sa.Column("downloaded", sa.Integer(), nullable=True), 33 | sa.Column("created_at", sa.DATETIME(), nullable=True), 34 | sa.PrimaryKeyConstraint("id"), 35 | sa.UniqueConstraint("media_id"), 36 | ) 37 | op.create_table( 38 | "stories", 39 | sa.Column("id", sa.Integer(), nullable=False), 40 | sa.Column("post_id", sa.Integer(), nullable=False), 41 | sa.Column("text", sa.String(), nullable=True), 42 | sa.Column("price", sa.Integer(), nullable=True), 43 | sa.Column("paid", sa.Integer(), nullable=True), 44 | sa.Column("created_at", sa.DATETIME(), nullable=True), 45 | sa.PrimaryKeyConstraint("id"), 46 | sa.UniqueConstraint("post_id"), 47 | ) 48 | # ### end Alembic commands ### 49 | 50 | 51 | def downgrade(): 52 | # ### commands auto generated by Alembic - please adjust! ### 53 | op.drop_table("stories") 54 | op.drop_table("medias") 55 | # ### end Alembic commands ### 56 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/versions/e0c73f066547_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: e0c73f066547 5 | Revises: 2e4f8364f7e2 6 | Create Date: 2021-06-20 12:42:31.056065 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "e0c73f066547" 15 | down_revision = "2e4f8364f7e2" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | with op.batch_alter_table("medias", schema=None) as batch_op: 23 | batch_op.add_column(sa.Column("api_type", sa.String(), nullable=True)) 24 | 25 | # ### end Alembic commands ### 26 | 27 | 28 | def downgrade(): 29 | # ### commands auto generated by Alembic - please adjust! ### 30 | with op.batch_alter_table("medias", schema=None) as batch_op: 31 | batch_op.drop_column("api_type") 32 | 33 | # ### end Alembic commands ### 34 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/versions/ebc3f4bb0782_content.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | """content 3 | 4 | Revision ID: ebc3f4bb0782 5 | Revises: 29f675c35eee 6 | Create Date: 2021-02-14 19:56:54.040372 7 | 8 | """ 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "ebc3f4bb0782" 15 | down_revision = "29f675c35eee" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | with op.batch_alter_table("medias", schema=None) as batch_op: 23 | batch_op.add_column(sa.Column("linked", sa.String(), nullable=True)) 24 | 25 | # ### end Alembic commands ### 26 | 27 | 28 | def downgrade(): 29 | # ### commands auto generated by Alembic - please adjust! ### 30 | with op.batch_alter_table("medias", schema=None) as batch_op: 31 | batch_op.drop_column("linked") 32 | 33 | # ### end Alembic commands ### 34 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/stories.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | ### posts.py ### 3 | 4 | from sqlalchemy.orm import declarative_base 5 | 6 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.api_model import ( 7 | ApiModel, 8 | ) 9 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.media_model import ( 10 | TemplateMediaModel, 11 | ) 12 | 13 | Base = declarative_base() 14 | 15 | 16 | class api_table(ApiModel, Base): 17 | ApiModel.__tablename__ = "stories" 18 | 19 | 20 | class TemplateMediaModel(TemplateMediaModel, Base): 21 | pass 22 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/test_stories.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/test_stories.db -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/models/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/models/api_model.py: -------------------------------------------------------------------------------- 1 | ### api_table.py ### 2 | 3 | from datetime import datetime 4 | from typing import cast 5 | 6 | import sqlalchemy 7 | from sqlalchemy.orm import declarative_base 8 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.media_model import ( 9 | TemplateMediaModel, 10 | ) 11 | 12 | LegacyBase = declarative_base() 13 | 14 | 15 | class ApiModel: 16 | __tablename__ = "" 17 | id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) 18 | post_id = cast( 19 | int, sqlalchemy.Column(sqlalchemy.Integer, unique=True, nullable=False) 20 | ) 21 | text = cast(str, sqlalchemy.Column(sqlalchemy.String)) 22 | price = cast(int, sqlalchemy.Column(sqlalchemy.Integer)) 23 | paid = sqlalchemy.Column(sqlalchemy.Integer) 24 | archived = cast(bool, sqlalchemy.Column(sqlalchemy.Boolean, default=False)) 25 | created_at = cast(datetime, sqlalchemy.Column(sqlalchemy.TIMESTAMP)) 26 | medias: list[TemplateMediaModel] = [] 27 | 28 | def legacy(self, table_name: str): 29 | class legacy_api_table(LegacyBase): 30 | __tablename__ = table_name 31 | id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) 32 | text = sqlalchemy.Column(sqlalchemy.String) 33 | price = sqlalchemy.Column(sqlalchemy.Integer) 34 | paid = sqlalchemy.Column(sqlalchemy.Integer) 35 | created_at = sqlalchemy.Column(sqlalchemy.DATETIME) 36 | 37 | return legacy_api_table 38 | 39 | def convert(self): 40 | item = self.__dict__ 41 | item.pop("_sa_instance_state") 42 | return item 43 | 44 | def find_media(self, media_id: int): 45 | for db_media in self.medias: 46 | if db_media.media_id == media_id: 47 | return db_media 48 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/models/media_model.py: -------------------------------------------------------------------------------- 1 | ### api_table.py ### 2 | 3 | from datetime import datetime 4 | from typing import Any, cast 5 | 6 | import sqlalchemy 7 | 8 | 9 | class TemplateMediaModel: 10 | __tablename__ = "medias" 11 | id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) 12 | media_id = sqlalchemy.Column(sqlalchemy.Integer) 13 | post_id = sqlalchemy.Column(sqlalchemy.Integer, nullable=False) 14 | link = cast(str, sqlalchemy.Column(sqlalchemy.String)) 15 | directory = cast(str, sqlalchemy.Column(sqlalchemy.String)) 16 | filename = cast(str, sqlalchemy.Column(sqlalchemy.String)) 17 | size = cast(int | None, sqlalchemy.Column(sqlalchemy.Integer, default=0)) 18 | api_type = cast(str, sqlalchemy.Column(sqlalchemy.String)) 19 | media_type = sqlalchemy.Column(sqlalchemy.String) 20 | preview = sqlalchemy.Column(sqlalchemy.Integer, default=0) 21 | linked = sqlalchemy.Column(sqlalchemy.String, default=None) 22 | downloaded = cast(bool, sqlalchemy.Column(sqlalchemy.Integer, default=0)) 23 | created_at = cast(datetime, sqlalchemy.Column(sqlalchemy.TIMESTAMP)) 24 | 25 | def legacy(self, Base: Any): 26 | class legacy_media_table(Base): 27 | __tablename__ = "medias" 28 | id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) 29 | post_id = sqlalchemy.Column(sqlalchemy.Integer) 30 | link = sqlalchemy.Column(sqlalchemy.String) 31 | directory = sqlalchemy.Column(sqlalchemy.String) 32 | filename = sqlalchemy.Column(sqlalchemy.String) 33 | size = sqlalchemy.Column(sqlalchemy.Integer, default=None) 34 | media_type = sqlalchemy.Column(sqlalchemy.String) 35 | downloaded = sqlalchemy.Column(sqlalchemy.Integer, default=0) 36 | created_at = sqlalchemy.Column(sqlalchemy.DATETIME) 37 | 38 | return legacy_media_table 39 | 40 | def legacy_2(self, Base: Any): 41 | class legacy_media_table(Base): 42 | __tablename__ = "medias" 43 | id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) 44 | media_id = sqlalchemy.Column(sqlalchemy.Integer, unique=True) 45 | post_id = sqlalchemy.Column(sqlalchemy.Integer, nullable=False) 46 | link = cast(str, sqlalchemy.Column(sqlalchemy.String)) 47 | directory = cast(str, sqlalchemy.Column(sqlalchemy.String)) 48 | filename = cast(str, sqlalchemy.Column(sqlalchemy.String)) 49 | size = cast(int, sqlalchemy.Column(sqlalchemy.Integer, default=None)) 50 | media_type = sqlalchemy.Column(sqlalchemy.String) 51 | preview = sqlalchemy.Column(sqlalchemy.Integer, default=0) 52 | linked = sqlalchemy.Column(sqlalchemy.String, default=None) 53 | downloaded = cast(bool, sqlalchemy.Column(sqlalchemy.Integer, default=0)) 54 | created_at = cast(datetime, sqlalchemy.Column(sqlalchemy.TIMESTAMP)) 55 | 56 | return legacy_media_table 57 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/models/user_database.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, cast 2 | 3 | import sqlalchemy 4 | from sqlalchemy.orm.decl_api import declarative_base 5 | 6 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.api_model import ( 7 | ApiModel, 8 | ) 9 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.media_model import ( 10 | TemplateMediaModel, 11 | ) 12 | 13 | Base = declarative_base() 14 | LegacyBase = declarative_base() 15 | 16 | 17 | class profiles_table(Base): 18 | __tablename__ = "profiles" 19 | id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) 20 | user_id = cast(int, sqlalchemy.Column(sqlalchemy.Integer, nullable=False)) 21 | username = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False) 22 | 23 | 24 | class stories_table(ApiModel, Base): 25 | ApiModel.__tablename__ = "stories" 26 | 27 | 28 | class posts_table(ApiModel, Base): 29 | ApiModel.__tablename__ = "posts" 30 | 31 | 32 | class messages_table(ApiModel, Base): 33 | ApiModel.__tablename__ = "messages" 34 | user_id = cast(Optional[int], sqlalchemy.Column(sqlalchemy.Integer)) 35 | 36 | class api_legacy_table(ApiModel, LegacyBase): 37 | pass 38 | 39 | 40 | class products_table(ApiModel, Base): 41 | ApiModel.__tablename__ = "products" 42 | title = sqlalchemy.Column(sqlalchemy.String) 43 | 44 | 45 | class others_table(ApiModel, Base): 46 | ApiModel.__tablename__ = "others" 47 | 48 | 49 | # class comments_table(api_table,Base): 50 | # api_table.__tablename__ = "comments" 51 | 52 | 53 | class media_table(TemplateMediaModel, Base): 54 | class media_legacy_table(TemplateMediaModel().legacy_2(LegacyBase), LegacyBase): 55 | pass 56 | 57 | 58 | def table_picker(table_name: str, legacy: bool = False): 59 | match table_name: 60 | case "Stories" | "Highlights": 61 | table = stories_table 62 | case "Posts": 63 | table = posts_table 64 | case "Messages" | "Chats" | "MassMessages": 65 | table = messages_table if not legacy else messages_table().api_legacy_table 66 | case "Products": 67 | table = products_table 68 | case "Others": 69 | table = others_table 70 | case _: 71 | raise Exception(f'"{table_name}" is an invalid table name') 72 | return table 73 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/connections/sqlite/sqlite_database.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from pathlib import Path 3 | from typing import TYPE_CHECKING, Any 4 | 5 | import ultima_scraper_api 6 | from alembic import command 7 | from alembic.config import Config 8 | from alembic.migration import MigrationContext 9 | from sqlalchemy import create_engine, func 10 | from sqlalchemy.orm import DeclarativeBase, scoped_session, sessionmaker 11 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models import ( 12 | user_database, 13 | ) 14 | 15 | if TYPE_CHECKING: 16 | from ultima_scraper_collection.managers.metadata_manager.metadata_manager import ( 17 | ContentMetadata, 18 | ) 19 | 20 | user_types = ultima_scraper_api.user_types 21 | 22 | 23 | class DBCollection(object): 24 | def __init__(self) -> None: 25 | self.user_database = user_database 26 | 27 | def database_picker(self, database_name: str): 28 | match database_name: 29 | case "user_data": 30 | database = self.user_database 31 | case _: 32 | raise Exception(f'"{database_name}" is an invalid database name') 33 | return database 34 | 35 | 36 | class SqliteDatabase(DeclarativeBase): 37 | def __init__(self) -> None: 38 | self.name = Path() 39 | self.info = "" 40 | self.session_factory = sessionmaker() 41 | self.session = self.create_session() 42 | self.root_directory = Path(__file__).parent 43 | self.alembic_directory = Path() 44 | self.migration_directory = Path() 45 | 46 | def init_db(self, name: Path, legacy: bool = False): 47 | self.name: Path = name 48 | self.info = f"sqlite:///{name}" 49 | self.session_factory = sessionmaker(bind=self.create_engine(), autocommit=False) 50 | self.session = self.create_session() 51 | self.alembic_directory = Path(__file__).parent.joinpath( 52 | f"{'databases' if not legacy else 'legacy_databases'}", 53 | self.name.stem.lower(), 54 | "alembic", 55 | ) 56 | self.migration_directory = self.alembic_directory.parent.joinpath("migration") 57 | return self 58 | 59 | def create_engine(self): 60 | return create_engine( 61 | self.info, 62 | connect_args={}, 63 | ) 64 | 65 | def create_session(self): 66 | return scoped_session(self.session_factory) 67 | 68 | def execute(self, statement: Any): 69 | result = self.session.execute(statement) 70 | return result 71 | 72 | def generate_migration(self): 73 | if not self.session.bind: 74 | return 75 | conn = self.session.bind.engine.connect() 76 | context = MigrationContext.configure(conn) 77 | current_rev = context.get_current_revision() 78 | alembic_cfg = Config(self.migration_directory.joinpath("alembic.ini")) 79 | alembic_cfg.set_main_option( 80 | "script_location", 81 | self.migration_directory.joinpath("alembic").as_posix(), 82 | ) 83 | alembic_cfg.set_main_option("sqlalchemy.url", self.info) 84 | if not current_rev: 85 | _ggg = command.revision(alembic_cfg, autogenerate=True) 86 | else: 87 | _ggg = command.revision(alembic_cfg, autogenerate=True, head=current_rev) 88 | self.run_migrations() 89 | return True 90 | 91 | def run_migrations(self, legacy: bool = False) -> None: 92 | while True: 93 | try: 94 | migration_directory = ( 95 | self.alembic_directory.parent.joinpath("migration") 96 | if legacy 97 | else self.migration_directory 98 | ) 99 | 100 | alembic_cfg = Config(migration_directory.joinpath("alembic.ini")) 101 | alembic_cfg.set_main_option( 102 | "script_location", 103 | migration_directory.joinpath("alembic").as_posix(), 104 | ) 105 | alembic_cfg.set_main_option("sqlalchemy.url", self.info) 106 | command.upgrade(alembic_cfg, "head") 107 | break 108 | except Exception as e: 109 | print(e) 110 | pass 111 | 112 | def revert_migration(self): 113 | while True: 114 | try: 115 | alembic_cfg = Config(self.migration_directory.joinpath("alembic.ini")) 116 | alembic_cfg.set_main_option( 117 | "script_location", 118 | self.migration_directory.joinpath("alembic").as_posix(), 119 | ) 120 | alembic_cfg.set_main_option("sqlalchemy.url", self.info) 121 | command.downgrade(alembic_cfg, "-1") 122 | break 123 | except Exception as e: 124 | print(e) 125 | pass 126 | 127 | def import_metadata( 128 | self, datas: list["ContentMetadata"], api_type: str | None = None 129 | ): 130 | database_path = self.name 131 | database_path.parent.mkdir(parents=True, exist_ok=True) 132 | self.run_migrations() 133 | db_collection = DBCollection() 134 | database = db_collection.database_picker(database_path.stem) 135 | database_session = self.session 136 | for post in datas: 137 | if post.api_type: 138 | api_type = post.api_type 139 | api_table = database.table_picker(api_type) 140 | if not api_table: 141 | return 142 | post_id = post.content_id 143 | post_created_at_string = post.created_at 144 | date_object = None 145 | if post_created_at_string: 146 | try: 147 | date_object = datetime.fromisoformat(post_created_at_string) 148 | pass 149 | except Exception as _e: 150 | date_object = datetime.strptime( 151 | post_created_at_string, "%d-%m-%Y %H:%M:%S" 152 | ) 153 | pass 154 | result = database_session.query(api_table) 155 | post_db = result.filter_by(post_id=post_id).first() 156 | if not post_db: 157 | post_db = api_table() 158 | else: 159 | pass 160 | if api_type == "Messages": 161 | post_db.user_id = post.user_id 162 | post_db.post_id = post_id 163 | post_db.text = post.text 164 | post_db.price = post.price 165 | post_db.paid = post.paid 166 | post_db.archived = post.archived 167 | if date_object: 168 | post_db.created_at = date_object 169 | database_session.add(post_db) 170 | for media in post.medias: 171 | if media.media_type == "Texts": 172 | continue 173 | media_created_at_string = media.created_at 174 | if not isinstance(media_created_at_string, datetime): 175 | if isinstance(media_created_at_string, int): 176 | date_object = datetime.fromtimestamp(media_created_at_string) 177 | else: 178 | try: 179 | date_object = datetime.fromisoformat( 180 | media_created_at_string 181 | ) 182 | except Exception as _e: 183 | date_object = datetime.strptime( 184 | post_created_at_string, "%d-%m-%Y %H:%M:%S" 185 | ) 186 | pass 187 | media_id = media.id 188 | result = database_session.query(database.media_table) 189 | media_db = result.filter_by(post_id=post_id, media_id=media_id).first() 190 | if not media_db: 191 | media_db = result.filter_by( 192 | filename=media.filename, created_at=date_object 193 | ).first() 194 | if not media_db: 195 | media_db = database.media_table() 196 | else: 197 | pass 198 | if ( 199 | post.__legacy__ 200 | and media_db.media_id != media.id 201 | and media_db.media_id 202 | ): 203 | media_id = media_db.media_id 204 | 205 | media_db.media_id = media_id 206 | media_db.post_id = post_id 207 | media_db.size = media.size if media_db.size is None else media_db.size 208 | media_db.link = media.urls[0] if media.urls else None 209 | media_db.preview = media.preview 210 | media_db.directory = ( 211 | media.directory.as_posix() if media.directory else None 212 | ) 213 | media_db.filename = media.filename 214 | media_db.api_type = api_type 215 | media_db.media_type = media.media_type 216 | media_db.linked = media.linked 217 | if date_object: 218 | media_db.created_at = date_object 219 | database_session.add(media_db) 220 | database_session.commit() 221 | database_session.close() 222 | return True 223 | 224 | def legacy_sqlite_updater( 225 | self, 226 | api_type: str, 227 | subscription: user_types, 228 | ): 229 | final_result: list[dict[str, Any]] = [] 230 | legacy_metadata_path = self.name 231 | if legacy_metadata_path.exists(): 232 | self.run_migrations(legacy=True) 233 | database_name = "user_data" 234 | database_session = self.session 235 | db_collection = DBCollection() 236 | database = db_collection.database_picker(database_name) 237 | if database: 238 | if api_type == "Messages": 239 | api_table_table = database.table_picker(api_type, True) 240 | else: 241 | api_table_table = database.table_picker(api_type) 242 | media_table_table = database.media_table.media_legacy_table 243 | if api_table_table: 244 | result = database_session.query(api_table_table).all() 245 | result2 = database_session.query(media_table_table).all() 246 | for item in result: 247 | for item2 in result2: 248 | if item.post_id != item2.post_id: 249 | continue 250 | item.medias.append(item2) 251 | item.user_id = subscription.id 252 | final_result.append(item) 253 | database_session.close() 254 | return final_result 255 | 256 | def find_table(self, name: str): 257 | table = [x for x in self.metadata.sorted_tables if x.name == name] 258 | if table: 259 | return table[0] 260 | 261 | def get_count(self, q: Any): 262 | count_q = q.statement.with_only_columns(func.count()).order_by(None) 263 | count: int = q.session.execute(count_q).scalar() 264 | return count 265 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/database_manager/database_manager.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.sqlite_database import ( 4 | SqliteDatabase, 5 | ) 6 | 7 | 8 | class DatabaseManager: 9 | def __init__(self) -> None: 10 | self.active_db: SqliteDatabase | None = None 11 | 12 | def get_sqlite_db(self, path: Path, legacy: bool = False): 13 | sqlite_db = SqliteDatabase().init_db(path, legacy) 14 | self.active_db = sqlite_db 15 | return sqlite_db 16 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/datascraper_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/datascraper_manager/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/datascraper_manager/datascraper_manager.py: -------------------------------------------------------------------------------- 1 | import ultima_scraper_api 2 | from ultima_scraper_api import SUPPORTED_SITES 3 | from ultima_scraper_api.apis.onlyfans import onlyfans 4 | from ultima_scraper_collection import datascraper_types 5 | from ultima_scraper_collection.config import UltimaScraperCollectionConfig 6 | from ultima_scraper_collection.managers.datascraper_manager.datascrapers.fansly import ( 7 | FanslyDataScraper, 8 | ) 9 | from ultima_scraper_collection.managers.datascraper_manager.datascrapers.onlyfans import ( 10 | OnlyFansDataScraper, 11 | ) 12 | from ultima_scraper_collection.managers.option_manager import OptionManager 13 | from ultima_scraper_collection.managers.server_manager import ServerManager 14 | 15 | 16 | class DataScraperManager: 17 | def __init__( 18 | self, server_manager: ServerManager, config: UltimaScraperCollectionConfig 19 | ) -> None: 20 | self.datascrapers: dict[str, datascraper_types] = {} 21 | self.server_manager: ServerManager = server_manager 22 | self.config = config 23 | for site_name in SUPPORTED_SITES: 24 | datascraper = self.add_datascraper( 25 | ultima_scraper_api.select_api(site_name, config), 26 | OptionManager(), 27 | self.server_manager, 28 | ) 29 | datascraper.filesystem_manager.activate_directory_manager( 30 | self.get_site_config(site_name) 31 | ) 32 | 33 | def get_site_config(self, name: str): 34 | return getattr(self.config.site_apis, name.lower()) 35 | 36 | def find_datascraper( 37 | self, 38 | site_name: str, 39 | ): 40 | return self.datascrapers.get(site_name.lower()) 41 | 42 | def select_datascraper( 43 | self, 44 | site_name: str, 45 | ): 46 | return self.datascrapers.get(site_name.lower()) 47 | 48 | def add_datascraper( 49 | self, 50 | api: ultima_scraper_api.api_types, 51 | option_manager: OptionManager, 52 | server_manager: ServerManager, 53 | ): 54 | site_settings = self.get_site_config(api.site_name) 55 | if isinstance(api, onlyfans.OnlyFansAPI): 56 | datascraper = OnlyFansDataScraper( 57 | api, option_manager, server_manager, site_settings 58 | ) 59 | else: 60 | datascraper = FanslyDataScraper( 61 | api, option_manager, server_manager, site_settings 62 | ) 63 | self.datascrapers[api.site_name.lower()] = datascraper 64 | return datascraper 65 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/datascraper_manager/datascrapers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/datascraper_manager/datascrapers/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/datascraper_manager/datascrapers/fansly.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import TYPE_CHECKING, Any 3 | 4 | from ultima_scraper_api.apis.fansly.fansly import FanslyAPI 5 | from ultima_scraper_renamer.reformat import ReformatManager 6 | 7 | from ultima_scraper_collection.config import Sites 8 | from ultima_scraper_collection.managers.metadata_manager.metadata_manager import ( 9 | ApiExtractor, 10 | ContentMetadata, 11 | ) 12 | from ultima_scraper_collection.managers.option_manager import OptionManager 13 | from ultima_scraper_collection.managers.server_manager import ServerManager 14 | from ultima_scraper_collection.modules.module_streamliner import StreamlinedDatascraper 15 | 16 | if TYPE_CHECKING: 17 | from ultima_scraper_api.apis.fansly.classes.auth_model import AuthModel 18 | from ultima_scraper_api.apis.fansly.classes.message_model import create_message 19 | from ultima_scraper_api.apis.fansly.classes.post_model import create_post 20 | from ultima_scraper_api.apis.fansly.classes.story_model import create_story 21 | from ultima_scraper_api.apis.fansly.classes.user_model import create_user 22 | 23 | 24 | class FanslyDataScraper(StreamlinedDatascraper): 25 | def __init__( 26 | self, 27 | api: FanslyAPI, 28 | option_manager: OptionManager, 29 | server_manager: ServerManager, 30 | site_config: Sites.FanslyAPIConfig, 31 | ) -> None: 32 | self.api = api 33 | self.option_manager = option_manager 34 | self.site_config = site_config 35 | StreamlinedDatascraper.__init__(self, self, server_manager) 36 | 37 | # Scrapes the API for content 38 | async def media_scraper( 39 | self, 40 | content_result: "create_story | create_post | create_message", 41 | subscription: "create_user", 42 | api_type: str, 43 | ) -> dict[str, Any]: 44 | authed = subscription.get_authed() 45 | site_config = self.site_config 46 | new_set: dict[str, Any] = {"content": []} 47 | directories: list[Path] = [] 48 | if api_type == "Stories": 49 | pass 50 | if api_type == "Archived": 51 | pass 52 | if api_type == "Posts": 53 | pass 54 | if api_type == "Messages": 55 | pass 56 | 57 | content_metadata = ContentMetadata(content_result.id, api_type) 58 | await content_metadata.resolve_extractor(ApiExtractor(content_result)) 59 | for asset in content_metadata.medias: 60 | if asset.urls: 61 | reformat_manager = ReformatManager(authed, self.filesystem_manager) 62 | reformat_item = reformat_manager.prepare_reformat(asset) 63 | file_directory = reformat_item.reformat( 64 | site_config.download_setup.directory_format 65 | ) 66 | reformat_item.directory = file_directory 67 | file_path = reformat_item.reformat( 68 | site_config.download_setup.filename_format 69 | ) 70 | asset.directory = file_directory 71 | asset.filename = file_path.name 72 | 73 | if file_directory not in directories: 74 | directories.append(file_directory) 75 | new_set["content"].append(content_metadata) 76 | new_set["directories"] = directories 77 | return new_set 78 | 79 | async def get_all_stories(self, subscription: "create_user"): 80 | """ 81 | get_all_stories(subscription: create_user) 82 | 83 | This function returns a list of all stories and archived stories from the given subscription. 84 | 85 | Arguments: 86 | subscription (create_user): An instance of the create_user class. 87 | 88 | Returns: 89 | list[create_story]: A list containing all stories and archived stories from the subscription. 90 | """ 91 | master_set: list["create_story"] = [] 92 | master_set.extend(await subscription.get_stories()) 93 | # master_set.extend(await subscription.get_archived_stories()) 94 | return master_set 95 | 96 | async def get_all_posts(self, subscription: "create_user"): 97 | temp_master_set = await subscription.get_posts() 98 | collections = await subscription.get_collections() 99 | for collection in collections: 100 | temp_master_set.append( 101 | await subscription.get_collection_content(collection) 102 | ) 103 | return temp_master_set 104 | 105 | async def get_all_subscriptions( 106 | self, 107 | authed: "AuthModel", 108 | identifiers: list[int | str] = [], 109 | refresh: bool = True, 110 | ): 111 | """ 112 | get_all_subscriptions(authed: AuthModel, identifiers: list[int | str] = [], refresh: bool = True) 113 | 114 | This function returns a list of all subscriptions, including both subscriptions and followings, 115 | from the given authenticated user. 116 | 117 | Arguments: 118 | authed (AuthModel): An instance of the AuthModel class. 119 | identifiers (list[int | str], optional): A list of identifiers (username or id) for the subscriptions. Defaults to an empty list. 120 | refresh (bool, optional): A flag indicating whether to refresh the list of subscriptions. Defaults to True. 121 | 122 | Returns: 123 | list[create_subscription]: A list of all subscriptions, including both subscriptions and followings, from the authenticated user. 124 | """ 125 | authed.followed_users = await authed.get_followings(identifiers=identifiers) 126 | subscriptions = await authed.get_subscriptions( 127 | identifiers=identifiers, refresh=refresh, sub_type="active" 128 | ) 129 | subscriptions.sort(key=lambda x: x.ends_at) 130 | return subscriptions 131 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/datascraper_manager/datascrapers/onlyfans.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from datetime import datetime, timezone 3 | from pathlib import Path 4 | from typing import TYPE_CHECKING, Any 5 | 6 | from sqlalchemy import select 7 | from sqlalchemy.orm import joinedload 8 | from ultima_scraper_api.apis.onlyfans.classes.mass_message_model import MassMessageModel 9 | from ultima_scraper_api.apis.onlyfans.onlyfans import OnlyFansAPI 10 | from ultima_scraper_collection.config import Sites 11 | from ultima_scraper_collection.managers.metadata_manager.metadata_manager import ( 12 | ApiExtractor, 13 | ContentMetadata, 14 | ) 15 | from ultima_scraper_collection.managers.option_manager import OptionManager 16 | from ultima_scraper_collection.managers.server_manager import ServerManager 17 | from ultima_scraper_collection.modules.module_streamliner import StreamlinedDatascraper 18 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import PostModel 19 | from ultima_scraper_renamer.reformat import ReformatManager 20 | 21 | if TYPE_CHECKING: 22 | from ultima_scraper_api.apis.onlyfans.classes.auth_model import OnlyFansAuthModel 23 | from ultima_scraper_api.apis.onlyfans.classes.hightlight_model import ( 24 | create_highlight, 25 | ) 26 | from ultima_scraper_api.apis.onlyfans.classes.message_model import create_message 27 | from ultima_scraper_api.apis.onlyfans.classes.post_model import create_post 28 | from ultima_scraper_api.apis.onlyfans.classes.story_model import create_story 29 | from ultima_scraper_api.apis.onlyfans.classes.user_model import create_user 30 | 31 | 32 | class OnlyFansDataScraper(StreamlinedDatascraper): 33 | def __init__( 34 | self, 35 | api: OnlyFansAPI, 36 | option_manager: OptionManager, 37 | server_manager: ServerManager, 38 | site_config: Sites.OnlyFansAPIConfig, 39 | ) -> None: 40 | self.api = api 41 | self.option_manager = option_manager 42 | self.site_config = site_config 43 | StreamlinedDatascraper.__init__(self, self, server_manager) 44 | 45 | # Scrapes the API for content 46 | async def media_scraper( 47 | self, 48 | content_result: "create_story | create_post | create_message|MassMessageModel", 49 | subscription: "create_user", 50 | api_type: str, 51 | ) -> dict[str, Any]: 52 | api_type = self.api.convert_api_type_to_key(content_result) 53 | authed = subscription.get_authed() 54 | site_config = self.site_config 55 | new_set: dict[str, Any] = {"content": []} 56 | directories: list[Path] = [] 57 | if api_type == "Stories": 58 | pass 59 | if api_type == "Posts": 60 | pass 61 | if api_type == "Messages": 62 | pass 63 | 64 | content_metadata = ContentMetadata( 65 | content_result.id, api_type, self.resolve_content_manager(subscription) 66 | ) 67 | 68 | await content_metadata.resolve_extractor(ApiExtractor(content_result)) 69 | for asset in content_metadata.medias: 70 | if asset.urls: 71 | reformat_manager = ReformatManager(authed, self.filesystem_manager) 72 | reformat_item = reformat_manager.prepare_reformat(asset) 73 | if reformat_item.api_type == "Messages": 74 | if ( 75 | content_metadata.queue_id 76 | and content_metadata.__soft__.is_mass_message() 77 | ): 78 | reformat_item.api_type = "MassMessages" 79 | file_directory = reformat_item.reformat( 80 | site_config.download_setup.directory_format 81 | ) 82 | reformat_item.directory = file_directory 83 | file_path = reformat_item.reformat( 84 | site_config.download_setup.filename_format 85 | ) 86 | asset.directory = file_directory 87 | asset.filename = file_path.name 88 | 89 | if file_directory not in directories: 90 | directories.append(file_directory) 91 | new_set["content"].append(content_metadata) 92 | new_set["directories"] = directories 93 | return new_set 94 | 95 | async def get_all_stories(self, subscription: "create_user"): 96 | """ 97 | get_all_stories(subscription: create_user) 98 | 99 | This function returns a list of all stories and highlights from the given subscription. 100 | 101 | Arguments: 102 | subscription (create_user): An instance of the create_user class. 103 | 104 | Returns: 105 | list[create_highlight | create_story]: A list containing all stories and highlights from the subscription. 106 | """ 107 | master_set: list[create_highlight | create_story] = [] 108 | master_set.extend(await subscription.get_stories()) 109 | master_set.extend(await subscription.get_archived_stories()) 110 | highlights = await subscription.get_highlights() 111 | valid_highlights: list[create_highlight | create_story] = [] 112 | for highlight in highlights: 113 | resolved_highlight = await subscription.get_highlights( 114 | hightlight_id=highlight.id 115 | ) 116 | valid_highlights.extend(resolved_highlight) 117 | master_set.extend(valid_highlights) 118 | return master_set 119 | 120 | async def get_all_posts(self, performer: "create_user") -> list["create_post"]: 121 | async with self.get_archive_db_api().create_site_api( 122 | performer.get_api().site_name 123 | ) as db_site_api: 124 | after_date = None 125 | # db_performer = await db_site_api.get_user(performer.id) 126 | # await db_performer.awaitable_attrs._posts 127 | # result = await db_performer.last_subscription_downloaded_at() 128 | # if result: 129 | # after_date = result.downloaded_at 130 | 131 | posts = await performer.get_posts(after_date=after_date) 132 | archived_posts = await performer.get_posts(label="archived") 133 | private_archived_posts = await performer.get_posts(label="private_archived") 134 | 135 | session = db_site_api.get_session() 136 | posts_with_comments = ( 137 | select(PostModel) 138 | .options(joinedload(PostModel.comments)) 139 | .filter(PostModel.comments.any()) 140 | .where(PostModel.user_id == performer.id) 141 | .order_by(PostModel.created_at.desc()) 142 | ) 143 | results = await session.scalars(posts_with_comments) 144 | db_posts = results.unique().all() 145 | threshold_date = ( 146 | db_posts[0].created_at 147 | if db_posts 148 | else datetime.min.replace(tzinfo=timezone.utc) 149 | ) 150 | tasks = [ 151 | x.get_comments() 152 | for x in performer.scrape_manager.scraped.Posts.values() 153 | if x.created_at > threshold_date 154 | ] 155 | await asyncio.gather(*tasks) 156 | return posts + archived_posts + private_archived_posts 157 | 158 | async def get_all_subscriptions( 159 | self, 160 | authed: "OnlyFansAuthModel", 161 | identifiers: list[int | str] = [], 162 | refresh: bool = True, 163 | ): 164 | """ 165 | get_all_subscriptions(authed: AuthModel, identifiers: list[int | str] = [], refresh: bool = True) 166 | 167 | This function returns a list of all subscriptions from the given authenticated user. 168 | 169 | Arguments: 170 | authed (AuthModel): An instance of the AuthModel class. 171 | identifiers (list[int | str], optional): A list of identifiers (username or id) for the subscriptions. Defaults to an empty list. 172 | refresh (bool, optional): A flag indicating whether to refresh the list of subscriptions. Defaults to True. 173 | 174 | Returns: 175 | list[create_subscription]: A list of all subscriptions, sorted by expiredAt, from the authenticated user. 176 | """ 177 | subscriptions = await authed.get_subscriptions( 178 | identifiers=identifiers, refresh=refresh, sub_type="active" 179 | ) 180 | subscriptions.sort(key=lambda x: x.subscribed_by_expire_date) 181 | return subscriptions 182 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/download_manager.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import copy 3 | from pathlib import Path 4 | from urllib.parse import urlparse 5 | 6 | import ffmpeg 7 | from aiohttp import ClientResponse 8 | from alive_progress import alive_bar 9 | from ultima_scraper_api import auth_types 10 | from ultima_scraper_api.apis.onlyfans.classes.mass_message_model import MassMessageModel 11 | from ultima_scraper_api.helpers import main_helper 12 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import ( 13 | MediaModel, 14 | MessageModel, 15 | ) 16 | from ultima_scraper_renamer.reformat import ReformatManager 17 | 18 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.media_model import ( 19 | TemplateMediaModel, 20 | ) 21 | from ultima_scraper_collection.managers.filesystem_manager import FilesystemManager 22 | from ultima_scraper_collection.managers.metadata_manager.metadata_manager import ( 23 | MediaMetadata, 24 | ) 25 | 26 | 27 | class DownloadManager: 28 | def __init__( 29 | self, 30 | authed: auth_types, 31 | filesystem_manager: FilesystemManager, 32 | media_set: set[MediaMetadata] = set(), 33 | reformat: bool = True, 34 | ) -> None: 35 | self.authed = authed 36 | self.filesystem_manager = filesystem_manager 37 | self.auth_session = self.authed.auth_session 38 | self.requester = self.authed.get_requester() 39 | self.content_list: set[MediaMetadata] = media_set 40 | self.errors: list[TemplateMediaModel] = [] 41 | self.reformat = reformat 42 | self.reformat_manager = ReformatManager(self.authed, filesystem_manager) 43 | self.bar = None 44 | 45 | async def bulk_download(self): 46 | final_list = [self.download(media_item) for media_item in self.content_list] 47 | if final_list: 48 | with alive_bar(len(self.content_list)) as bar: 49 | self.bar = bar 50 | _result = await asyncio.gather(*final_list, return_exceptions=True) 51 | 52 | async def drm_download(self, download_item: MediaMetadata): 53 | content_metadata = download_item.__content_metadata__ 54 | authed = self.authed 55 | reformat_manager = ReformatManager(authed, self.filesystem_manager) 56 | assert reformat_manager.filesystem_manager.directory_manager 57 | site_config = reformat_manager.filesystem_manager.directory_manager.site_config 58 | drm = authed.drm 59 | media_item = download_item.__raw__ 60 | assert drm and media_item 61 | mpd = await drm.get_mpd(media_item) 62 | pssh = await drm.get_pssh(mpd) 63 | responses: list[ClientResponse] = [] 64 | 65 | if pssh: 66 | if content_metadata: 67 | soft_data = content_metadata.__soft__ 68 | raw_data = soft_data.__raw__.copy() 69 | if ( 70 | isinstance(soft_data, MassMessageModel) 71 | and soft_data 72 | and soft_data.author.is_authed_user() 73 | ): 74 | raw_data["responseType"] = "" 75 | else: 76 | raw_data = {"responseType": ""} 77 | license = await drm.get_license(raw_data, media_item, pssh) 78 | keys = await drm.get_keys(license) 79 | content_key = keys[-1] 80 | key = f"{content_key.kid.hex}:{content_key.key.hex()}" 81 | download_item.key = key 82 | video_url, audio_url = [ 83 | drm.get_video_url(mpd, media_item), 84 | drm.get_audio_url(mpd, media_item), 85 | ] 86 | download_item.urls = [video_url] 87 | reformat_item = reformat_manager.prepare_reformat(download_item) 88 | file_directory = reformat_item.reformat( 89 | site_config.download_setup.directory_format 90 | ) 91 | reformat_item.directory = file_directory 92 | file_path = reformat_item.reformat( 93 | site_config.download_setup.filename_format 94 | ) 95 | download_item.directory = file_directory 96 | download_item.filename = file_path.name 97 | for media_url in video_url, audio_url: 98 | drm_download_item = copy.copy(download_item) 99 | drm_download_item = reformat_manager.drm_format( 100 | media_url, drm_download_item 101 | ) 102 | 103 | signature_str = await drm.get_signature(media_item) 104 | response = await authed.auth_session.request( 105 | media_url, premade_settings="", custom_cookies=signature_str 106 | ) 107 | responses.append(response) 108 | return responses 109 | 110 | async def download(self, download_item: MediaMetadata): 111 | if not download_item.urls: 112 | return 113 | attempt = 0 114 | db_media = download_item.__db_media__ 115 | assert db_media 116 | await db_media.awaitable_attrs.content_media_assos 117 | content = download_item.get_content_metadata() 118 | if content: 119 | db_content = content.__db_content__ 120 | assert db_content 121 | if isinstance(db_content, MessageModel): 122 | if db_content.queue_id: 123 | try: 124 | db_filepath = db_media.find_filepath( 125 | (db_content.queue_id, "MassMessages") 126 | ) 127 | except Exception as _e: 128 | pass 129 | pass 130 | else: 131 | db_filepath = db_media.find_filepath() 132 | else: 133 | db_filepath = db_media.find_filepath() 134 | else: 135 | db_filepath = db_media.find_filepath() 136 | pass 137 | matches = ["us", "uk", "ca", "ca2", "de"] 138 | p_url = urlparse(download_item.urls[0]) 139 | assert p_url.hostname 140 | subdomain = p_url.hostname.split(".")[0] 141 | if any(subdomain in nm for nm in matches): 142 | return 143 | 144 | authed = self.authed 145 | authed_drm = authed.drm 146 | 147 | async with self.auth_session.semaphore: 148 | while attempt < self.auth_session.get_session_manager().max_attempts + 1: 149 | try: 150 | if download_item.drm: 151 | if not authed_drm: 152 | break 153 | responses = await self.drm_download(download_item) 154 | else: 155 | responses = [ 156 | await self.requester.request(download_item.urls[0]) 157 | ] 158 | if all(response.status != 200 for response in responses): 159 | attempt += 1 160 | continue 161 | if not download_item.directory: 162 | raise Exception( 163 | f"{download_item.id} has no directory\n {download_item}" 164 | ) 165 | decrypted_media_paths: list[Path] = [] 166 | final_size = 0 167 | error = None 168 | for response in responses: 169 | if download_item.drm and await self.drm_check_downloaded( 170 | download_item 171 | ): 172 | continue 173 | download_path, error = await self.writer( 174 | response, download_item, encrypted=bool(download_item.key) 175 | ) 176 | if error: 177 | attempt += 1 178 | break 179 | if authed_drm and download_item.drm and download_path: 180 | output_filepath = authed_drm.decrypt_file( 181 | download_path, download_item.key 182 | ) 183 | if not output_filepath: 184 | raise Exception("No output_filepath") 185 | decrypted_media_paths.append(output_filepath) 186 | if response.content_length: 187 | final_size += response.content_length 188 | if error == 1: 189 | # Server Disconnect Error 190 | continue 191 | elif error == 2: 192 | # Resource Not Found Error 193 | break 194 | assert download_item.filename 195 | download_path = download_item.directory.joinpath( 196 | download_item.filename 197 | ) 198 | if authed_drm and download_item.drm: 199 | formatted = self.format_media( 200 | download_path, 201 | decrypted_media_paths, 202 | ) 203 | if not formatted: 204 | pass 205 | final_size = download_path.stat().st_size 206 | timestamp = db_media.created_at.timestamp() 207 | await main_helper.format_file( 208 | download_path, timestamp, self.reformat 209 | ) 210 | if db_media and db_filepath: 211 | if not db_filepath.preview: 212 | db_media.size = download_item.size = final_size 213 | else: 214 | if final_size > db_media.size: 215 | db_media.size = final_size 216 | db_filepath.downloaded = True 217 | break 218 | except asyncio.TimeoutError as _e: 219 | continue 220 | except Exception as _e: 221 | print(_e) 222 | self.bar() 223 | 224 | async def writer( 225 | self, 226 | result: ClientResponse, 227 | download_item: MediaMetadata, 228 | encrypted: bool = True, 229 | ): 230 | async with result as response: 231 | if download_item.drm and encrypted: 232 | download_item = copy.copy(download_item) 233 | download_item = self.reformat_manager.drm_format( 234 | response.url.human_repr(), download_item 235 | ) 236 | assert download_item.directory and download_item.filename 237 | download_path = Path(download_item.directory, download_item.filename) 238 | db_media = copy.copy(download_item.__db_media__) 239 | db_media.directory = download_item.directory 240 | db_media.filename = download_item.filename 241 | download = await self.check(db_media, response) 242 | if not download: 243 | return download_path, None 244 | failed = await self.filesystem_manager.write_data(response, download_path) 245 | return download_path, failed 246 | 247 | async def drm_check_downloaded(self, download_item: MediaMetadata): 248 | download_path = download_item.get_filepath() 249 | if download_path.exists(): 250 | if download_path.stat().st_size and download_item.__db_media__.size: 251 | return True 252 | return False 253 | 254 | async def check(self, download_item: MediaModel, response: ClientResponse): 255 | # Checks if we should download item or not // True | False 256 | filepath = Path(download_item.directory, download_item.filename) 257 | response_status = False 258 | if response.status == 200: 259 | response_status = True 260 | if response.content_length: 261 | download_item.size = response.content_length 262 | 263 | if filepath.exists(): 264 | try: 265 | if filepath.stat().st_size == response.content_length: 266 | return False 267 | else: 268 | return True 269 | except Exception as _e: 270 | pass 271 | else: 272 | if response_status: 273 | # Can produce false positives due to the same reason below 274 | return True 275 | else: 276 | # Reached this point because it probably exists in the folder but under a different content category 277 | pass 278 | 279 | def format_media(self, output_filepath: Path, decrypted_media_paths: list[Path]): 280 | # If you have decrypted video and audio to merge 281 | if len(decrypted_media_paths) > 1: 282 | dec_video_path, dec_audio_path = decrypted_media_paths 283 | video_input = ffmpeg.input(dec_video_path) # type:ignore 284 | audio_input = ffmpeg.input(dec_audio_path) # type:ignore 285 | try: 286 | _ffmpeg_output = ffmpeg.output( # type:ignore 287 | video_input, # type:ignore 288 | audio_input, # type:ignore 289 | output_filepath.as_posix(), 290 | vcodec="copy", 291 | acodec="copy", 292 | ).run(capture_stdout=True, capture_stderr=True, overwrite_output=True) 293 | return True 294 | except ffmpeg.Error as _e: 295 | return False 296 | return True 297 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/filesystem_manager.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import copy 4 | import hashlib 5 | import os 6 | import shutil 7 | from pathlib import Path 8 | from typing import TYPE_CHECKING, Any, Generator, Literal 9 | 10 | import ultima_scraper_api 11 | from aiohttp.client_reqrep import ClientResponse 12 | from ultima_scraper_api.helpers.main_helper import open_partial 13 | from ultima_scraper_api.managers.session_manager import EXCEPTION_TEMPLATE 14 | from ultima_scraper_collection.helpers import main_helper as usc_helper 15 | from ultima_scraper_renamer.reformat import ( 16 | FormatAttributes, 17 | ReformatItem, 18 | ReformatManager, 19 | ) 20 | 21 | if TYPE_CHECKING: 22 | api_types = ultima_scraper_api.api_types 23 | user_types = ultima_scraper_api.user_types 24 | from ultima_scraper_collection import datascraper_types 25 | from ultima_scraper_collection.config import site_config_types 26 | 27 | 28 | class FilesystemManager: 29 | def __init__(self) -> None: 30 | self.user_data_directory = Path("__user_data__") 31 | self.trash_directory = self.user_data_directory.joinpath("trash") 32 | self.profiles_directory = self.user_data_directory.joinpath("profiles") 33 | self.devices_directory = self.user_data_directory.joinpath("drm_device") 34 | self.settings_directory = Path("__user_data__") 35 | self.ignore_files = ["desktop.ini", ".DS_Store", ".DS_store", "@eaDir"] 36 | self.directory_manager: DirectoryManager | None = None 37 | self.directory_manager_users: dict[int, DirectoryManager] = {} 38 | self.file_manager_users: dict[int, FileManager] = {} 39 | 40 | def __iter__(self): 41 | for each in self.__dict__.values(): 42 | yield each 43 | 44 | def check(self): 45 | for directory in self: 46 | if isinstance(directory, Path): 47 | directory.mkdir(exist_ok=True) 48 | 49 | def move(self, src: Path, trg: Path): 50 | shutil.move(src, trg) 51 | 52 | def remove_mandatory_files( 53 | self, files: list[Path] | Generator[Path, None, None], keep: list[str] = [] 54 | ): 55 | folders = [x for x in files if x.name not in self.ignore_files] 56 | if keep: 57 | folders = [x for x in files if x.name in keep] 58 | return folders 59 | 60 | def get_directory_manager(self, user_id: int): 61 | return self.directory_manager_users[user_id] 62 | 63 | def get_file_manager(self, user_id: int): 64 | return self.file_manager_users[user_id] 65 | 66 | def activate_directory_manager(self, site_config: site_config_types): 67 | root_metadata_directory = usc_helper.check_space( 68 | site_config.metadata_setup.directories 69 | ) 70 | root_download_directory = usc_helper.check_space( 71 | site_config.download_setup.directories 72 | ) 73 | self.directory_manager = DirectoryManager( 74 | site_config, 75 | root_metadata_directory, 76 | root_download_directory, 77 | ) 78 | 79 | def trash(self): 80 | pass 81 | 82 | async def write_data( 83 | self, response: ClientResponse, download_path: Path, callback: Any = None 84 | ): 85 | status_code = None 86 | if response.status == 200: 87 | total_length = 0 88 | os.makedirs(os.path.dirname(download_path), exist_ok=True) 89 | with open_partial(download_path) as f: 90 | partial_path = f.name 91 | try: 92 | async for data in response.content.iter_chunked(4096): 93 | f.write(data) 94 | length = len(data) 95 | total_length += length 96 | if callback: 97 | callback(length) 98 | except EXCEPTION_TEMPLATE as _e: 99 | status_code = 1 100 | except Exception as _e: 101 | raise Exception(f"Unknown Error: {_e}") 102 | except: 103 | os.unlink(partial_path) 104 | raise 105 | else: 106 | if status_code: 107 | os.unlink(partial_path) 108 | else: 109 | try: 110 | os.replace(partial_path, download_path) 111 | except OSError: 112 | pass 113 | else: 114 | if response.content_length: 115 | pass 116 | # progress_bar.update_total_size(-response.content_length) 117 | status_code = 2 118 | return status_code 119 | 120 | async def create_option( 121 | self, 122 | datascraper: datascraper_types, 123 | username: str, 124 | directory: Path, 125 | format_key: str, 126 | ): 127 | api = datascraper.api 128 | option = { 129 | "site_name": api.site_name, 130 | "profile_username": username, 131 | "model_username": username, 132 | "directory": directory, 133 | } 134 | reformat_item_fd = ReformatItem(option) 135 | assert self.directory_manager 136 | f_d_p = reformat_item_fd.remove_non_unique(self.directory_manager, format_key) 137 | return f_d_p 138 | 139 | async def create_directory_manager( 140 | self, site_config: site_config_types, user: user_types 141 | ): 142 | if self.directory_manager: 143 | final_download_directory = await self.discover_main_directory(user) 144 | final_root_download_directory = ( 145 | self.directory_manager.root_download_directory 146 | ) 147 | for directory in site_config.download_setup.directories: 148 | assert directory.path 149 | if directory.path.as_posix() in final_download_directory.as_posix(): 150 | final_root_download_directory = directory.path 151 | break 152 | directory_manager = DirectoryManager( 153 | site_config, 154 | self.directory_manager.root_metadata_directory, 155 | final_root_download_directory, 156 | ) 157 | self.directory_manager_users[user.id] = directory_manager 158 | self.file_manager_users[user.id] = FileManager(directory_manager) 159 | return directory_manager 160 | 161 | async def discover_main_metadata_directory(self, subscription: user_types): 162 | usernames = subscription.get_usernames(ignore_id=False) 163 | valid_usernames = subscription.get_usernames(ignore_id=True) 164 | authed = subscription.get_authed() 165 | reformat_manager = ReformatManager(authed, self) 166 | directory_manager = self.directory_manager 167 | site_config = directory_manager.site_config 168 | final_store_directory = None 169 | for username in usernames: 170 | for store_directory in [ 171 | x.path for x in site_config.metadata_setup.directories if x.path 172 | ]: 173 | download_directory_reformat_item = ( 174 | reformat_manager.prepare_user_reformat( 175 | subscription, store_directory, username=username 176 | ) 177 | ) 178 | formatted_download_directory = ( 179 | download_directory_reformat_item.reformat( 180 | site_config.metadata_setup.directory_format 181 | ) 182 | ) 183 | final_store_directory = formatted_download_directory 184 | if final_store_directory.exists(): 185 | if username == f"u{subscription.id}": 186 | if valid_usernames: 187 | download_directory_reformat_item = ( 188 | reformat_manager.prepare_user_reformat( 189 | subscription, 190 | store_directory, 191 | username=valid_usernames[-1], 192 | ) 193 | ) 194 | formatted_download_directory = ( 195 | download_directory_reformat_item.reformat( 196 | site_config.metadata_setup.directory_format 197 | ) 198 | ) 199 | if not formatted_download_directory.exists(): 200 | formatted_download_directory.mkdir( 201 | exist_ok=True, parents=True 202 | ) 203 | final_store_directory.rename( 204 | formatted_download_directory 205 | ) 206 | final_store_directory = formatted_download_directory 207 | else: 208 | final_store_directory = formatted_download_directory 209 | return final_store_directory 210 | else: 211 | return final_store_directory 212 | return final_store_directory 213 | 214 | async def discover_main_directory(self, subscription: user_types): 215 | usernames = subscription.get_usernames(ignore_id=False) 216 | if f"u{subscription.id}" not in usernames: 217 | usernames.append(f"u{subscription.id}") 218 | valid_usernames = subscription.get_usernames(ignore_id=True) 219 | authed = subscription.get_authed() 220 | reformat_manager = ReformatManager(authed, self) 221 | directory_manager = self.directory_manager 222 | assert directory_manager 223 | site_config = directory_manager.site_config 224 | store_directories = [ 225 | x.path for x in site_config.download_setup.directories if x.path 226 | ] 227 | 228 | for username in usernames: 229 | for store_directory in store_directories: 230 | download_directory_reformat_item = ( 231 | reformat_manager.prepare_user_reformat( 232 | subscription, store_directory, username=username 233 | ) 234 | ) 235 | formatted_download_directory = ( 236 | download_directory_reformat_item.remove_non_unique( 237 | directory_manager, "file_directory_format" 238 | ) 239 | ) 240 | 241 | if formatted_download_directory.exists(): 242 | if username == f"u{subscription.id}" and valid_usernames: 243 | download_directory_reformat_item = ( 244 | reformat_manager.prepare_user_reformat( 245 | subscription, 246 | store_directory, 247 | username=valid_usernames[-1], 248 | ) 249 | ) 250 | new_formatted_download_directory = ( 251 | download_directory_reformat_item.remove_non_unique( 252 | directory_manager, "file_directory_format" 253 | ) 254 | ) 255 | if not new_formatted_download_directory.exists(): 256 | # formatted_download_directory.mkdir( 257 | # exist_ok=True, parents=True 258 | # ) 259 | formatted_download_directory.rename( 260 | new_formatted_download_directory 261 | ) 262 | formatted_download_directory = ( 263 | new_formatted_download_directory 264 | ) 265 | return formatted_download_directory 266 | return formatted_download_directory 267 | 268 | download_directory_reformat_item = reformat_manager.prepare_user_reformat( 269 | subscription, directory_manager.root_download_directory, username=username 270 | ) 271 | formatted_download_directory = ( 272 | download_directory_reformat_item.remove_non_unique( 273 | directory_manager, "file_directory_format" 274 | ) 275 | ) 276 | return formatted_download_directory 277 | 278 | async def discover_alternative_directories(self, subscription: user_types): 279 | usernames = subscription.get_usernames(ignore_id=False) 280 | authed = subscription.get_authed() 281 | reformat_manager = ReformatManager(authed, self) 282 | directory_manager = self.get_directory_manager(subscription.id) 283 | site_config = directory_manager.site_config 284 | for username in usernames: 285 | for alt_download_directory in [ 286 | x.path for x in site_config.download_setup.directories if x.path 287 | ]: 288 | alt_download_directory_reformat_item = ( 289 | reformat_manager.prepare_user_reformat( 290 | subscription, alt_download_directory, username=username 291 | ) 292 | ) 293 | formatted_alt_download_directory = ( 294 | alt_download_directory_reformat_item.remove_non_unique( 295 | directory_manager, "file_directory_format" 296 | ) 297 | ) 298 | if ( 299 | formatted_alt_download_directory 300 | == directory_manager.user.download_directory 301 | ): 302 | continue 303 | if formatted_alt_download_directory.exists(): 304 | directory_manager.user.alt_download_directories.append( 305 | formatted_alt_download_directory 306 | ) 307 | return directory_manager.user.alt_download_directories 308 | 309 | async def format_directories(self, performer: user_types) -> DirectoryManager: 310 | authed = performer.get_authed() 311 | directory_manager = self.get_directory_manager(performer.id) 312 | file_manager = self.get_file_manager(performer.id) 313 | 314 | final_metadata_directory = await self.discover_main_metadata_directory( 315 | performer 316 | ) 317 | directory_manager.user.metadata_directory = final_metadata_directory 318 | 319 | final_download_directory = await self.discover_main_directory(performer) 320 | directory_manager.user.download_directory = final_download_directory 321 | 322 | api = authed.api 323 | performer_username = performer.get_usernames(ignore_id=True)[-1] 324 | site_name = authed.api.site_name 325 | alt_directories = await self.discover_alternative_directories(performer) 326 | await file_manager.set_default_files() 327 | _metadata_filepaths = await file_manager.find_metadata_files(legacy_files=False) 328 | # for metadata_filepath in metadata_filepaths: 329 | # if file_manager.directory_manager.user.metadata_directory.as_posix() in metadata_filepath.parent.as_posix(): 330 | # continue 331 | # new_filepath = file_manager.directory_manager.user.metadata_directory.joinpath(metadata_filepath.name) 332 | # if new_filepath.exists(): 333 | # new_filepath = usc_helper.find_unused_filename( 334 | # new_filepath 335 | # ) 336 | # if new_filepath.exists(): 337 | # breakpoint() 338 | # file_manager.rename_path(metadata_filepath, new_filepath) 339 | # pass 340 | # alt_files = await usc_helper.walk( 341 | # file_manager.directory_manager.user.download_directory 342 | # ) 343 | # if not alt_files: 344 | # shutil.rmtree(file_manager.directory_manager.user.download_directory) 345 | await file_manager.merge_alternative_directories(alt_directories) 346 | user_metadata_directory = directory_manager.user.metadata_directory 347 | assert user_metadata_directory 348 | _user_download_directory = directory_manager.user.download_directory 349 | legacy_metadata_directory = user_metadata_directory 350 | directory_manager.user.legacy_metadata_directories.append( 351 | legacy_metadata_directory 352 | ) 353 | items = api.CategorizedContent() 354 | for api_type, _ in items: 355 | legacy_metadata_directory_2 = user_metadata_directory.joinpath(api_type) 356 | directory_manager.user.legacy_metadata_directories.append( 357 | legacy_metadata_directory_2 358 | ) 359 | legacy_model_directory = directory_manager.root_download_directory.joinpath( 360 | site_name, performer_username 361 | ) 362 | directory_manager.user.legacy_download_directories.append( 363 | legacy_model_directory 364 | ) 365 | return directory_manager 366 | 367 | 368 | class DirectoryManager: 369 | def __init__( 370 | self, 371 | site_config: site_config_types, 372 | root_metadata_directory: Path, 373 | root_download_directory: Path, 374 | ) -> None: 375 | self.root_directory = Path() 376 | self.root_metadata_directory = Path(root_metadata_directory) 377 | self.root_download_directory = Path(root_download_directory) 378 | self.user = self.UserDirectories() 379 | self.site_config = site_config 380 | formats = FormatTypes(site_config) 381 | string, status = formats.check_rules() 382 | if not status: 383 | print(string) 384 | exit(0) 385 | self.formats = formats 386 | pass 387 | 388 | def create_directories(self): 389 | # self.profile.create_directories() 390 | self.root_metadata_directory.mkdir(exist_ok=True) 391 | self.root_download_directory.mkdir(exist_ok=True) 392 | 393 | def delete_empty_directories( 394 | self, directory: Path, filesystem_manager: FilesystemManager 395 | ): 396 | for root, dirnames, _files in os.walk(directory, topdown=False): 397 | for dirname in dirnames: 398 | full_path = os.path.realpath(os.path.join(root, dirname)) 399 | contents = os.listdir(full_path) 400 | if not contents: 401 | shutil.rmtree(full_path, ignore_errors=True) 402 | else: 403 | content_paths = [Path(full_path, content) for content in contents] 404 | contents = filesystem_manager.remove_mandatory_files(content_paths) 405 | if not contents: 406 | shutil.rmtree(full_path, ignore_errors=True) 407 | 408 | if os.path.exists(directory) and not os.listdir(directory): 409 | os.rmdir(directory) 410 | 411 | # class ProfileDirectories: 412 | # def __init__(self, root_directory: Path) -> None: 413 | # self.root_directory = Path(root_directory) 414 | # self.metadata_directory = self.root_directory.joinpath("Metadata") 415 | # def create_directories(self): 416 | # self.root_directory.mkdir(exist_ok=True) 417 | 418 | class UserDirectories: 419 | def __init__(self) -> None: 420 | self.metadata_directory: Path | None = None 421 | self.download_directory: Path | None = None 422 | self.alt_download_directories: list[Path] = [] 423 | self.legacy_download_directories: list[Path] = [] 424 | self.legacy_metadata_directories: list[Path] = [] 425 | 426 | def find_legacy_directory( 427 | self, 428 | directory_type: Literal["metadata", "download"] = "metadata", 429 | api_type: str = "", 430 | ): 431 | match directory_type: 432 | case "metadata": 433 | directories = self.legacy_metadata_directories 434 | case _: 435 | directories = self.legacy_download_directories 436 | final_directory = directories[0] 437 | for directory in directories: 438 | for part in directory.parts: 439 | if api_type in part: 440 | return directory 441 | return final_directory 442 | 443 | async def walk(self, directory: Path): 444 | return await usc_helper.walk(directory) 445 | 446 | 447 | class FileManager: 448 | def __init__(self, directory_manager: DirectoryManager) -> None: 449 | self.files: list[Path] = [] 450 | self.directory_manager = directory_manager 451 | 452 | async def set_default_files( 453 | self, 454 | ): 455 | assert self.directory_manager.user.metadata_directory 456 | assert self.directory_manager.user.download_directory 457 | await self.update_files(self.directory_manager.user.metadata_directory) 458 | await self.update_files(self.directory_manager.user.download_directory) 459 | 460 | async def refresh_files(self): 461 | return await self.set_default_files() 462 | 463 | async def update_files(self, directory: Path): 464 | directory_manager = self.directory_manager 465 | files = await directory_manager.walk(directory) 466 | self.files.extend(files) 467 | return files 468 | 469 | def add_file(self, filepath: Path): 470 | self.files.append(filepath) 471 | return True 472 | 473 | def remove_file(self, filepath: Path): 474 | if filepath in self.files: 475 | self.files.remove(filepath) 476 | return True 477 | return False 478 | 479 | def rename_path(self, old_filepath: Path, new_filepath: Path): 480 | self.remove_file(old_filepath) 481 | self.add_file(new_filepath) 482 | new_filepath.parent.mkdir(exist_ok=True, parents=True) 483 | shutil.move(old_filepath, new_filepath) 484 | return True 485 | 486 | def delete_path(self, filepath: Path): 487 | if filepath.is_dir(): 488 | filepath.rmdir() 489 | else: 490 | self.remove_file(filepath) 491 | filepath.unlink(missing_ok=True) 492 | return True 493 | 494 | async def cleanup(self): 495 | unique: set[Path] = set() 496 | await self.refresh_files() 497 | for valid_file in self.find_string_in_path("__drm__"): 498 | self.delete_path(valid_file) 499 | unique.add(valid_file.parent) 500 | for unique_file in unique: 501 | self.delete_path(unique_file) 502 | return True 503 | 504 | def find_string_in_path(self, string: str): 505 | valid_files: list[Path] = [] 506 | for file in self.files: 507 | if string in file.as_posix(): 508 | valid_files.append(file) 509 | return valid_files 510 | 511 | async def find_metadata_files(self, legacy_files: bool = True): 512 | new_list: list[Path] = [] 513 | for filepath in self.files: 514 | if not legacy_files: 515 | if "__legacy_metadata__" in filepath.parts: 516 | continue 517 | match filepath.suffix: 518 | case ".db": 519 | red_list = ["thumbs.db"] 520 | status = [x for x in red_list if x == filepath.name.lower()] 521 | if status: 522 | continue 523 | new_list.append(filepath) 524 | case ".json": 525 | new_list.append(filepath) 526 | case _: 527 | pass 528 | return new_list 529 | 530 | async def merge_alternative_directories(self, alt_directories: list[Path]): 531 | directory_manager = self.directory_manager 532 | assert directory_manager.user.download_directory 533 | for alt_download_directory in alt_directories: 534 | alt_files = await directory_manager.walk(alt_download_directory) 535 | for alt_file in alt_files: 536 | new_filepath = Path( 537 | alt_file.as_posix().replace( 538 | alt_download_directory.as_posix(), 539 | directory_manager.user.download_directory.as_posix(), 540 | ) 541 | ) 542 | 543 | if alt_file.suffix in [".json", ".db"]: 544 | if new_filepath.exists(): 545 | new_filepath = usc_helper.find_unused_filename(new_filepath) 546 | if new_filepath.exists(): 547 | breakpoint() 548 | if new_filepath.exists(): 549 | old_checksum = hashlib.md5(alt_file.read_bytes()).hexdigest() 550 | new_checksum = hashlib.md5(new_filepath.read_bytes()).hexdigest() 551 | if old_checksum == new_checksum: 552 | self.delete_path(alt_file) 553 | else: 554 | old_size = alt_file.stat().st_size 555 | new_size = new_filepath.stat().st_size 556 | if old_size > new_size: 557 | self.rename_path(alt_file, new_filepath) 558 | elif new_size > old_size: 559 | self.delete_path(alt_file) 560 | elif old_size == new_size: 561 | if usc_helper.is_image_valid(new_filepath): 562 | self.delete_path(alt_file) 563 | elif usc_helper.is_image_valid(alt_file): 564 | self.rename_path(alt_file, new_filepath) 565 | else: 566 | self.rename_path(alt_file, new_filepath) 567 | 568 | alt_files = await usc_helper.walk(alt_download_directory) 569 | if not alt_files: 570 | shutil.rmtree(alt_download_directory) 571 | 572 | 573 | class FormatTypes: 574 | def __init__(self, site_settings: site_config_types) -> None: 575 | self.metadata_directory_format = site_settings.metadata_setup.directory_format 576 | self.file_directory_format = site_settings.download_setup.directory_format 577 | self.filename_format = site_settings.download_setup.filename_format 578 | 579 | def check_rules(self): 580 | """Checks for invalid filepath 581 | 582 | Returns: 583 | tuple(str,bool): Returns a string which explains invalid filepath format 584 | """ 585 | bool_status = True 586 | wl = [] 587 | invalid_list = [] 588 | string = "" 589 | for key, _value in self: 590 | if key == "file_directory_format": 591 | bl = FormatAttributes() 592 | wl = [v for _k, v in bl.__dict__.items()] 593 | bl = bl.whitelist(wl) 594 | invalid_list = [] 595 | for b in bl: 596 | if b in self.file_directory_format.as_posix(): 597 | invalid_list.append(b) 598 | if key == "filename_format": 599 | bl = FormatAttributes() 600 | wl = [v for _k, v in bl.__dict__.items()] 601 | bl = bl.whitelist(wl) 602 | invalid_list = [] 603 | for b in bl: 604 | if b in self.filename_format.as_posix(): 605 | invalid_list.append(b) 606 | if key == "metadata_directory_format": 607 | wl = [ 608 | "{site_name}", 609 | "{first_letter}", 610 | "{model_id}", 611 | "{profile_username}", 612 | "{model_username}", 613 | ] 614 | bl = FormatAttributes().whitelist(wl) 615 | invalid_list: list[str] = [] 616 | for b in bl: 617 | if b in self.metadata_directory_format.as_posix(): 618 | invalid_list.append(b) 619 | if invalid_list: 620 | string += f"You cannot use {','.join(invalid_list)} in {key}. Use any from this list {','.join(wl)}" 621 | bool_status = False 622 | 623 | return string, bool_status 624 | 625 | def check_unique(self): 626 | values: list[str] = [] 627 | unique = [] 628 | new_format_copied = copy.deepcopy(self) 629 | option: dict[str, Any] = {} 630 | option["string"] = "" 631 | option["bool_status"] = True 632 | option["unique"] = new_format_copied 633 | f = FormatAttributes() 634 | for key, value in self: 635 | value: Path 636 | if key == "file_directory_format": 637 | unique = ["{media_id}", "{model_username}"] 638 | values = list(value.parts) 639 | option["unique"].file_directory_format = unique 640 | elif key == "filename_format": 641 | values = [] 642 | unique = ["{media_id}", "{filename}"] 643 | for _key2, value2 in f: 644 | if value2 in value.as_posix(): 645 | values.append(value2) 646 | option["unique"].filename_format = unique 647 | elif key == "metadata_directory_format": 648 | unique = ["{model_username}"] 649 | values = list(value.parts) 650 | option["unique"].metadata_directory_format = unique 651 | if key != "filename_format": 652 | e = [x for x in values if x in unique] 653 | else: 654 | e = [x for x in unique if x in values] 655 | if e: 656 | setattr(option["unique"], key, e) 657 | else: 658 | option[ 659 | "string" 660 | ] += f"{key} is a invalid format since it has no unique identifiers. Use any from this list {','.join(unique)}\n" 661 | option["bool_status"] = False 662 | return option 663 | 664 | def __iter__(self): 665 | for attr, value in self.__dict__.items(): 666 | yield attr, value 667 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/metadata_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/metadata_manager/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/option_manager.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from ultima_scraper_collection.config import auto_types 3 | 4 | 5 | class OptionManager: 6 | def __init__(self) -> None: 7 | self.performer_options: OptionsFormat | None = None 8 | self.subscription_options: OptionsFormat | None = None 9 | pass 10 | 11 | async def create_option( 12 | self, 13 | items: list[Any], 14 | category: str, 15 | auto_choice: auto_types = False, 16 | ): 17 | option = await OptionsFormat(items, category, auto_choice).formatter() 18 | return option 19 | 20 | 21 | class OptionsFormat: 22 | def __init__( 23 | self, 24 | items: list[Any], 25 | options_type: str, 26 | auto_choice: auto_types = False, 27 | ) -> None: 28 | self.items = items 29 | self.item_keys: list[str] = [] 30 | self.string = "" 31 | self.options_type = options_type 32 | self.auto_choice = auto_choice 33 | self.final_choices = [] 34 | 35 | async def formatter(self): 36 | options_type = self.options_type 37 | final_string = f"Choose {options_type.capitalize()}: 0 = All" 38 | auto_choice = self.auto_choice 39 | if type(auto_choice) == int: 40 | auto_choice = str(auto_choice) 41 | 42 | if isinstance(auto_choice, str): 43 | auto_choice = [x for x in auto_choice.split(",") if x] 44 | auto_choice = ( 45 | True if any(x in ["0", "all"] for x in auto_choice) else auto_choice 46 | ) 47 | 48 | if isinstance(auto_choice, list): 49 | auto_choice = [x for x in auto_choice if x] 50 | self.auto_choice = auto_choice 51 | 52 | match options_type: 53 | case "sites": 54 | self.item_keys = self.items 55 | my_string = " | ".join( 56 | map(lambda x: f"{self.items.index(x)+1} = {x}", self.items) 57 | ) 58 | final_string = f"{final_string} | {my_string}" 59 | self.string = final_string 60 | final_list = await self.choose_option() 61 | self.final_choices = [ 62 | key 63 | for choice in final_list 64 | for key in self.items 65 | if choice.lower() == key.lower() 66 | ] 67 | case "profiles": 68 | self.item_keys = [x.get_auth_details().username for x in self.items] 69 | my_string = " | ".join( 70 | map( 71 | lambda x: f"{self.items.index(x)+1} = {x.get_auth_details().username}", 72 | self.items, 73 | ) 74 | ) 75 | final_string = f"{final_string} | {my_string}" 76 | self.string = final_string 77 | final_list = await self.choose_option() 78 | self.final_choices = [ 79 | key 80 | for choice in final_list 81 | for key in self.items 82 | if choice.lower() == key.get_auth_details().username.lower() 83 | ] 84 | set1 = set(self.final_choices) 85 | set2 = set(self.items) 86 | difference = list(set2 - set1) 87 | for auth in difference: 88 | await auth.session_manager.active_session.close() 89 | case "subscriptions": 90 | subscription_users = [x for x in self.items] 91 | self.item_keys = [x.username for x in subscription_users] 92 | my_string = " | ".join( 93 | map( 94 | lambda x: f"{subscription_users.index(x)+1} = {x.username}", 95 | subscription_users, 96 | ) 97 | ) 98 | final_string = f"{final_string} | {my_string}" 99 | self.string = final_string 100 | final_list = await self.choose_option() 101 | self.final_choices = [ 102 | key 103 | for choice in final_list 104 | for key in subscription_users 105 | if choice.lower() == key.username.lower() 106 | ] 107 | 108 | case "contents": 109 | self.item_keys = self.items 110 | my_string = " | ".join( 111 | map(lambda x: f"{self.items.index(x)+1} = {x}", self.items) 112 | ) 113 | final_string = f"{final_string} | {my_string}" 114 | self.string = final_string 115 | final_list = await self.choose_option() 116 | self.final_choices = [ 117 | key 118 | for choice in final_list 119 | for key in self.items 120 | if choice.lower() == key.lower() 121 | ] 122 | case "medias": 123 | self.item_keys = self.items 124 | my_string = " | ".join( 125 | map(lambda x: f"{self.items.index(x)+1} = {x}", self.items) 126 | ) 127 | final_string = f"{final_string} | {my_string}" 128 | self.string = final_string 129 | final_list = await self.choose_option() 130 | self.final_choices = [ 131 | key 132 | for choice in final_list 133 | for key in self.items 134 | if choice.lower() == key.lower() 135 | ] 136 | case _: 137 | final_list = [] 138 | return self 139 | 140 | async def choose_option(self): 141 | def process_option(input_values: list[str]): 142 | input_list_2: list[str] = [] 143 | for input_value in input_values: 144 | if input_value.isdigit(): 145 | try: 146 | input_list_2.append(self.item_keys[int(input_value) - 1]) 147 | except IndexError: 148 | continue 149 | else: 150 | x = [x for x in self.item_keys if x.lower() == input_value.lower()] 151 | input_list_2.extend(x) 152 | return input_list_2 153 | 154 | input_list: list[str] = [x.lower() for x in self.item_keys] 155 | final_list: list[str] = [] 156 | if self.auto_choice: 157 | if not self.scrape_all(): 158 | if isinstance(self.auto_choice, list): 159 | input_values = [str(x).lower() for x in self.auto_choice] 160 | input_list = process_option(input_values) 161 | else: 162 | print(self.string) 163 | input_value = input().lower() 164 | if input_value != "0" and input_value != "all": 165 | input_values = input_value.split(",") 166 | input_list = process_option(input_values) 167 | final_list = input_list 168 | return final_list 169 | 170 | def scrape_all(self): 171 | status = False 172 | if ( 173 | self.auto_choice == True 174 | or isinstance(self.auto_choice, list) 175 | and isinstance(self.auto_choice[0], str) 176 | and ( 177 | self.auto_choice[0].lower() == "all" 178 | or self.auto_choice[0].lower() == "0" 179 | ) 180 | ): 181 | status = True 182 | return status 183 | 184 | def return_auto_choice(self): 185 | identifiers: list[int | str] | int | str | bool = [] 186 | if isinstance(self.auto_choice, list): 187 | identifiers = [x for x in self.auto_choice if not isinstance(x, bool)] 188 | return identifiers 189 | -------------------------------------------------------------------------------- /ultima_scraper_collection/managers/server_manager.py: -------------------------------------------------------------------------------- 1 | import socket 2 | from typing import Sequence 3 | 4 | import netifaces 5 | from sqlalchemy import select 6 | from ultima_scraper_db.databases.ultima_archive.database_api import ArchiveAPI 7 | from ultima_scraper_db.databases.ultima_archive.schemas.management import ( 8 | ServerModel, 9 | SiteModel, 10 | ) 11 | from ultima_scraper_db.managers.database_manager import Database, Schema 12 | 13 | 14 | class ServerManager: 15 | def __init__(self, ultima_archive_db_api: ArchiveAPI) -> None: 16 | self.ultima_archive_db_api = ultima_archive_db_api 17 | 18 | async def init(self, database: Database): 19 | def create_socket(socket_type: socket.SocketKind = socket.SOCK_DGRAM): 20 | temp_socket = socket.socket(socket.AF_INET, socket_type) 21 | temp_socket.connect(("8.8.8.8", 80)) # Connecting to Google's DNS server 22 | return temp_socket 23 | 24 | def get_local_ip(): 25 | # Create a temporary connection to a remote server to retrieve the local IP address 26 | temp_socket = create_socket() 27 | local_ip = temp_socket.getsockname()[0] 28 | temp_socket.close() 29 | return local_ip 30 | 31 | def mac_for_ip(ip: str) -> str | None: 32 | "Returns a list of MACs for interfaces that have given IP, returns None if not found" 33 | for i in netifaces.interfaces(): # type: ignore 34 | addrs = netifaces.ifaddresses(i) # type: ignore 35 | try: 36 | if_mac: str | None = addrs[netifaces.AF_LINK][0]["addr"] # type: ignore 37 | if_ip: str | None = addrs[netifaces.AF_INET][0]["addr"] # type: ignore 38 | except (IndexError, KeyError): # ignore ifaces that dont have MAC or IP 39 | if_mac = if_ip = None 40 | if if_ip == ip: 41 | return if_mac # type: ignore 42 | return None 43 | 44 | async with self.ultima_archive_db_api.create_management_api() as management_api: 45 | session = management_api.get_session() 46 | db_sites = await session.scalars(select(SiteModel)) 47 | db_sites = db_sites.all() 48 | self.reset = False 49 | if not db_sites: 50 | # Need to add a create or update for additional sites 51 | from ultima_scraper_db.databases.ultima_archive.schemas.management import ( 52 | default_sites, 53 | ) 54 | 55 | for site in default_sites: 56 | session.add(site) 57 | await session.commit() 58 | db_sites = await session.scalars(select(SiteModel)) 59 | db_sites = db_sites.all() 60 | self.reset = True 61 | private_ip = get_local_ip() 62 | mac_address = mac_for_ip(private_ip) 63 | # public_ip = requests.get("https://checkip.amazonaws.com/").text.strip() 64 | self.db_sites: Sequence[SiteModel] = db_sites 65 | self.ip_address = private_ip 66 | 67 | db_servers = await session.scalars(select(ServerModel)) 68 | db_servers = db_servers.all() 69 | if not db_servers: 70 | default_server = ServerModel( 71 | name="home", ip=self.ip_address, mac_address=mac_address 72 | ) 73 | session.add(default_server) 74 | await session.commit() 75 | active_server = await session.scalars( 76 | select(ServerModel).where( 77 | (ServerModel.ip == self.ip_address) 78 | & (ServerModel.mac_address == mac_address) 79 | ) 80 | ) 81 | self.active_server = active_server.one() 82 | self.site_schemas: dict[str, Schema] = {} 83 | for db_site in self.db_sites: 84 | site_schema_api = self.ultima_archive_db_api.get_site_api( 85 | db_site.db_name 86 | ) 87 | self.site_schemas[site_schema_api.schema.name] = site_schema_api.schema 88 | return self 89 | 90 | async def resolve_site_schema(self, value: str): 91 | return self.site_schemas[value] 92 | 93 | async def resolve_db_site(self, value: str): 94 | return [x for x in self.db_sites if x.db_name == value][0] 95 | 96 | async def find_site_api(self, name: str): 97 | return self.ultima_archive_db_api.site_apis[name] 98 | 99 | def get_server_id(self): 100 | return self.active_server.id 101 | -------------------------------------------------------------------------------- /ultima_scraper_collection/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/modules/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/modules/module_streamliner.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | import copy 5 | from typing import Any 6 | 7 | import ultima_scraper_api 8 | from sqlalchemy import and_, or_, select 9 | from sqlalchemy.orm import joinedload 10 | from ultima_scraper_api.apis.onlyfans.classes.auth_model import OnlyFansAuthModel 11 | from ultima_scraper_api.apis.onlyfans.classes.user_model import ( 12 | create_user as OnlyFansUserModel, 13 | ) 14 | from ultima_scraper_api.helpers.main_helper import ProgressBar 15 | from ultima_scraper_collection.config import site_config_types 16 | from ultima_scraper_collection.managers.content_manager import ( 17 | ContentManager, 18 | MediaManager, 19 | ) 20 | from ultima_scraper_collection.managers.download_manager import DownloadManager 21 | from ultima_scraper_collection.managers.filesystem_manager import FilesystemManager 22 | from ultima_scraper_collection.managers.metadata_manager.metadata_manager import ( 23 | MediaMetadata, 24 | MetadataManager, 25 | ) 26 | from ultima_scraper_collection.managers.server_manager import ServerManager 27 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import ( 28 | FilePathModel as DBFilePathModel, 29 | ) 30 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import ( 31 | MediaModel as DBMediaModel, 32 | ) 33 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import ( 34 | MessageModel as DBMessageModel, 35 | ) 36 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import UserModel 37 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import ( 38 | UserModel as DBUserModel, 39 | ) 40 | from ultima_scraper_renamer.reformat import ReformatManager 41 | 42 | auth_types = ultima_scraper_api.auth_types 43 | user_types = ultima_scraper_api.user_types 44 | message_types = ultima_scraper_api.message_types 45 | error_types = ultima_scraper_api.error_types 46 | subscription_types = ultima_scraper_api.subscription_types 47 | from typing import TYPE_CHECKING 48 | 49 | if TYPE_CHECKING: 50 | from ultima_scraper_collection.managers.datascraper_manager.datascrapers.fansly import ( 51 | FanslyDataScraper, 52 | ) 53 | from ultima_scraper_collection.managers.datascraper_manager.datascrapers.onlyfans import ( 54 | OnlyFansDataScraper, 55 | ) 56 | 57 | datascraper_types = OnlyFansDataScraper | FanslyDataScraper 58 | 59 | 60 | async def find_earliest_non_downloaded_message( 61 | user: user_types, datascraper: "datascraper_types" 62 | ): 63 | authed = user.get_authed() 64 | site_api = datascraper.server_manager.ultima_archive_db_api.get_site_api( 65 | authed.get_api().site_name 66 | ) 67 | earliest_non_downloaded_message = None 68 | found_media = False 69 | db_performer = datascraper.db_performers[user.id] 70 | db_content_manager = db_performer.content_manager 71 | assert db_content_manager, "Content manager not found" 72 | db_messages = db_content_manager.messages 73 | for db_message in db_messages: 74 | if (db_message.user_id == authed.id and db_message.receiver_id == user.id) or ( 75 | db_message.user_id == user.id and db_message.receiver_id == authed.id 76 | ): 77 | all_media_downloaded = False 78 | if db_message.media: 79 | found_media = True 80 | else: 81 | continue 82 | for media in db_message.media: 83 | for filepath in media.filepaths: 84 | if not filepath.message: 85 | continue 86 | if filepath.downloaded: 87 | all_media_downloaded = True 88 | break 89 | 90 | if not all_media_downloaded: 91 | if ( 92 | earliest_non_downloaded_message is None 93 | or db_message.created_at 94 | < earliest_non_downloaded_message.created_at 95 | ): 96 | earliest_non_downloaded_message = db_message 97 | if found_media: 98 | earliest_non_downloaded_message = db_messages[0] 99 | await site_api.schema.session.commit() 100 | return earliest_non_downloaded_message 101 | 102 | 103 | class StreamlinedDatascraper: 104 | def __init__( 105 | self, datascraper: datascraper_types, server_manager: ServerManager 106 | ) -> None: 107 | self.datascraper = datascraper 108 | self.filesystem_manager = FilesystemManager() 109 | self.media_types = self.datascraper.api.MediaTypes() 110 | self.user_list: set[user_types] = set() 111 | self.db_performers: dict[int, UserModel] = {} 112 | self.metadata_manager_users: dict[int, MetadataManager] = {} 113 | self.server_manager: ServerManager = server_manager 114 | self.content_managers: dict[int, ContentManager] = {} 115 | self.media_managers: dict[int, MediaManager] = {} 116 | 117 | def find_metadata_manager(self, user_id: int): 118 | return self.metadata_manager_users[user_id] 119 | 120 | def resolve_content_manager(self, user: user_types): 121 | content_manager = self.content_managers.get(user.id) 122 | authed = user.get_authed() 123 | if content_manager: 124 | if content_manager.authed.id != authed.id: 125 | content_manager = ContentManager(authed) 126 | else: 127 | content_manager = ContentManager(authed) 128 | self.content_managers[user.id] = content_manager 129 | return content_manager 130 | 131 | def create_media_manager(self, user: user_types): 132 | if user.id not in self.media_managers: 133 | self.media_managers[user.id] = MediaManager() 134 | return self.media_managers[user.id] 135 | 136 | def get_archive_db_api(self): 137 | return self.server_manager.ultima_archive_db_api 138 | 139 | async def configure_datascraper_jobs(self): 140 | api = self.datascraper.api 141 | site_config = self.datascraper.site_config 142 | available_jobs = site_config.jobs.scrape 143 | option_manager = self.datascraper.option_manager 144 | performer_options = option_manager.performer_options 145 | assert option_manager.subscription_options, "Subscription options not found" 146 | valid_user_list: set[user_types] = set( 147 | option_manager.subscription_options.final_choices 148 | ) 149 | scraping_subscriptions = site_config.jobs.scrape.subscriptions 150 | identifiers = [] 151 | if performer_options: 152 | identifiers = performer_options.return_auto_choice() 153 | if not available_jobs.subscriptions: 154 | for authed in api.auths.values(): 155 | authed.subscriptions = [] 156 | if available_jobs.messages: 157 | chat_users: list[user_types] = [] 158 | if identifiers: 159 | for authed in api.auths.values(): 160 | for identifier in identifiers: 161 | chat_user = await authed.get_user(identifier) 162 | if isinstance(chat_user, user_types): 163 | chat_users.append(chat_user) 164 | else: 165 | chat_users = await self.get_chat_users() 166 | [ 167 | user.scrape_whitelist.append("Messages") 168 | for user in chat_users 169 | if not user.is_subscribed() or not scraping_subscriptions 170 | ] 171 | [valid_user_list.add(x) for x in chat_users] 172 | 173 | if available_jobs.paid_contents: 174 | for authed in self.datascraper.api.auths.values(): 175 | paid_contents = await authed.get_paid_content() 176 | if not isinstance(paid_contents, error_types): 177 | for paid_content in paid_contents: 178 | author = paid_content.get_author() 179 | if identifiers: 180 | found = await author.match_identifiers(identifiers) 181 | if not found: 182 | continue 183 | if author: 184 | performer = authed.find_user( 185 | identifier=author.id, 186 | ) 187 | if performer: 188 | performer.job_whitelist.append("PaidContents") 189 | performer.scrape_whitelist.clear() 190 | valid_user_list.add(performer) 191 | from ultima_scraper_api.apis.fansly.classes.user_model import ( 192 | create_user as FYUserModel, 193 | ) 194 | 195 | for user in valid_user_list: 196 | if isinstance(user, FYUserModel) and user.following: 197 | user.scrape_whitelist.clear() 198 | pass 199 | pass 200 | # Need to filter out own profile with is_performer,etc 201 | final_valid_user_set = { 202 | user 203 | for user in valid_user_list 204 | if user.username not in user.get_authed().blacklist 205 | } 206 | 207 | self.user_list = final_valid_user_set 208 | return final_valid_user_set 209 | 210 | # Prepares the API links to be scraped 211 | async def scrape_vault( 212 | self, user: user_types, db_user: UserModel, content_type: str 213 | ): 214 | current_job = user.get_current_job() 215 | if not current_job: 216 | return 217 | authed: auth_types = user.get_authed() 218 | site_config = self.datascraper.site_config 219 | if ( 220 | isinstance(authed, OnlyFansAuthModel) 221 | and user.is_authed_user() 222 | and user.is_performer() 223 | ): 224 | vault = await authed.get_vault_lists() 225 | vault_item = vault.resolve(name=content_type) 226 | assert vault_item, f"Vault item {content_type} not found" 227 | vault_item_medias = await vault_item.get_medias() 228 | media_metadatas: list[MediaMetadata] = [] 229 | for vault_item_media in vault_item_medias: 230 | content_manager = self.resolve_content_manager(user) 231 | media_metadata = MediaMetadata( 232 | vault_item_media["id"], 233 | vault_item_media["type"], 234 | content_manager=content_manager, 235 | ) 236 | media_metadata.raw_extractor(user, vault_item_media) 237 | reformat_manager = ReformatManager(authed, self.filesystem_manager) 238 | reformat_item = reformat_manager.prepare_reformat(media_metadata) 239 | file_directory = reformat_item.reformat( 240 | site_config.download_setup.directory_format 241 | ) 242 | reformat_item.directory = file_directory 243 | file_path = reformat_item.reformat( 244 | site_config.download_setup.filename_format 245 | ) 246 | media_metadata.directory = file_directory 247 | media_metadata.filename = file_path.name 248 | media_metadatas.append(media_metadata) 249 | current_job.done = True 250 | 251 | async def prepare_filesystem(self, performer: user_types): 252 | await self.filesystem_manager.create_directory_manager( 253 | self.site_config, performer # type:ignore 254 | ) 255 | await self.filesystem_manager.format_directories(performer) 256 | metadata_manager = MetadataManager( 257 | performer, 258 | self.resolve_content_manager(performer), 259 | self.filesystem_manager, 260 | ) 261 | await metadata_manager.process_legacy_metadata() 262 | self.metadata_manager_users[performer.id] = metadata_manager 263 | return metadata_manager 264 | 265 | async def paid_content_scraper(self, authed: auth_types): 266 | paid_contents = await authed.get_paid_content() 267 | datascraper = self.datascraper 268 | assert datascraper, "Datascraper not found" 269 | unique_suppliers: set[user_types] = set() 270 | for paid_content in paid_contents: 271 | supplier = paid_content.get_author() 272 | await self.prepare_filesystem(supplier) 273 | content_manager = datascraper.resolve_content_manager(supplier) 274 | content_type = paid_content.get_content_type() 275 | result = await datascraper.media_scraper( 276 | paid_content, supplier, content_type # type:ignore 277 | ) 278 | content_manager.set_content( 279 | content_type, 280 | result["content"], 281 | ) 282 | unique_suppliers.add(supplier) 283 | 284 | for supplier in unique_suppliers: 285 | if isinstance(supplier, OnlyFansUserModel): 286 | content_manager = datascraper.resolve_content_manager(supplier) 287 | supplier.cache.messages.activate() 288 | contents = await supplier.get_mass_messages() 289 | supplier.cache.messages.deactivate() 290 | 291 | for content in contents: 292 | content_type = content.get_content_type() 293 | result = await datascraper.media_scraper( 294 | content, supplier, content_type # type:ignore 295 | ) 296 | content_manager.set_content( 297 | content_type, 298 | result["content"], 299 | ) 300 | 301 | async def prepare_scraper( 302 | self, 303 | user: user_types, 304 | metadata_manager: MetadataManager, 305 | content_type: str, 306 | master_set: list[Any] = [], 307 | ): 308 | authed = user.get_authed() 309 | current_job = user.get_current_job() 310 | if not current_job: 311 | return 312 | temp_master_set: list[Any] = copy.copy(master_set) 313 | if not temp_master_set and not current_job.ignore: 314 | match content_type: 315 | case "Stories": 316 | temp_master_set.extend(await self.datascraper.get_all_stories(user)) 317 | pass 318 | case "Posts": 319 | temp_master_set = await self.datascraper.get_all_posts(user) 320 | case "Messages": 321 | db_message = await find_earliest_non_downloaded_message( 322 | user, self.datascraper 323 | ) 324 | cutoff_id = db_message.id if db_message else None 325 | temp_master_set = await user.get_messages(cutoff_id=cutoff_id) 326 | case "Chats": 327 | pass 328 | case "Highlights": 329 | pass 330 | case "MassMessages": 331 | if isinstance(authed, OnlyFansAuthModel): 332 | if user.is_authed_user(): 333 | mass_message_stats = await authed.get_mass_message_stats() 334 | temp_master_set = [] 335 | for mass_message_stat in mass_message_stats: 336 | mass_message = ( 337 | await mass_message_stat.get_mass_message() 338 | ) 339 | temp_master_set.append(mass_message) 340 | else: 341 | db_message = await find_earliest_non_downloaded_message( 342 | user, self.datascraper 343 | ) 344 | cutoff_id = db_message.id if db_message else None 345 | mass_messages = await user.get_mass_messages( 346 | message_cutoff_id=cutoff_id 347 | ) 348 | temp_master_set.extend(mass_messages) 349 | case _: 350 | raise Exception(f"{content_type} is an invalid choice") 351 | # Adding paid content and removing duplicates by id 352 | if isinstance(user, ultima_scraper_api.onlyfans_classes.user_model.create_user): 353 | for paid_content in await user.get_paid_contents(content_type): 354 | temp_master_set.append(paid_content) 355 | pass 356 | temp_master_set = list( 357 | {getattr(obj, "id"): obj for obj in temp_master_set}.values() 358 | ) 359 | await self.process_scraped_content( 360 | temp_master_set, content_type, user, metadata_manager 361 | ) 362 | current_job.done = True 363 | 364 | async def process_scraped_content( 365 | self, 366 | master_set: list[dict[str, Any]], 367 | api_type: str, 368 | subscription: user_types, 369 | metadata_manager: MetadataManager, 370 | ): 371 | if not master_set: 372 | return False 373 | unrefined_set = [] 374 | tasks = [ 375 | asyncio.create_task( 376 | self.datascraper.media_scraper(x, subscription, api_type) # type:ignore 377 | ) 378 | for x in master_set 379 | ] 380 | unrefined_set: list[dict[str, Any]] = await ProgressBar( 381 | f"Processing Scraped {api_type}" 382 | ).gather(tasks) 383 | new_metadata = metadata_manager.merge_content_and_directories(unrefined_set) 384 | final_content, _final_directories = new_metadata 385 | if new_metadata: 386 | new_metadata_content = final_content 387 | content_manager = self.resolve_content_manager(subscription) 388 | content_manager.set_content(api_type, new_metadata_content) 389 | if new_metadata_content: 390 | pass 391 | else: 392 | print(f"No {api_type} found.") 393 | return True 394 | 395 | # Downloads scraped content 396 | async def prepare_downloads( 397 | self, performer: user_types, db_performer: DBUserModel, api_type: str 398 | ): 399 | site_db_api = self.server_manager.ultima_archive_db_api.find_site_api( 400 | self.datascraper.api.site_name 401 | ) 402 | current_job = performer.get_current_job() 403 | global_settings = performer.get_api().get_global_settings() 404 | filesystem_manager = self.datascraper.filesystem_manager 405 | performer_directory_manager = filesystem_manager.get_directory_manager( 406 | performer.id 407 | ) 408 | filesystem_manager = self.datascraper.filesystem_manager 409 | content_manager = self.resolve_content_manager(performer) 410 | db_medias = db_performer.content_manager.get_media_manager().medias 411 | final_download_set: set[MediaMetadata] = set() 412 | for db_media in db_medias.values(): 413 | content_info = None 414 | if api_type == "Uncategorized": 415 | await db_media.awaitable_attrs.content_media_assos 416 | if db_media.content_media_assos: 417 | continue 418 | if len(db_media.filepaths) > 1: 419 | continue 420 | else: 421 | db_content = await db_media.find_content(api_type) 422 | if not db_content: 423 | continue 424 | content_info = (db_content.id, api_type) 425 | db_filepath = db_media.find_filepath(content_info) 426 | if db_filepath: 427 | if api_type == "Uncategorized": 428 | media_metadata = content_manager.media_manager.medias.get( 429 | db_media.id 430 | ) 431 | else: 432 | media_metadata = content_manager.find_media( 433 | category=api_type, media_id=db_media.id 434 | ) 435 | if media_metadata and media_metadata.urls: 436 | media_metadata.__db_media__ = db_media 437 | final_download_set.add(media_metadata) 438 | total_media_count = len(final_download_set) 439 | download_media_count = len( 440 | [x for x in final_download_set if not x.get_filepath().exists()] 441 | ) 442 | directory = performer_directory_manager.user.download_directory 443 | if final_download_set: 444 | string = "Processing Download:\n" 445 | string += f"Name: {performer.username} | Type: {api_type} | Downloading: {download_media_count} | Total: {total_media_count} | Directory: {directory}\n" 446 | print(string) 447 | download_manager = DownloadManager( 448 | performer.get_authed(), 449 | filesystem_manager, 450 | final_download_set, 451 | global_settings.tools.reformatter.active, 452 | ) 453 | await download_manager.bulk_download() 454 | await site_db_api.schema.session.commit() 455 | if final_download_set: 456 | pass 457 | if current_job: 458 | current_job.done = True 459 | 460 | async def manage_subscriptions( 461 | self, 462 | authed: auth_types, 463 | identifiers: list[int | str] = [], 464 | refresh: bool = True, 465 | ): 466 | temp_subscriptions: list[subscription_types] = [] 467 | results = await self.datascraper.get_all_subscriptions( 468 | authed, identifiers, refresh 469 | ) 470 | site_settings = authed.api.get_site_settings() 471 | if not site_settings: 472 | return temp_subscriptions 473 | results.sort(key=lambda x: x.user.is_me(), reverse=True) 474 | for result in results: 475 | temp_subscriptions.append(result) 476 | authed.subscriptions = temp_subscriptions 477 | return authed.subscriptions 478 | 479 | async def account_setup( 480 | self, 481 | auth: auth_types, 482 | site_config: site_config_types, 483 | identifiers: list[int | str] | list[str] = [], 484 | ) -> tuple[bool, list[subscription_types]]: 485 | status = False 486 | subscriptions: list[subscription_types] = [] 487 | 488 | if auth.is_authed() and site_config: 489 | authed = auth 490 | # metadata_filepath = ( 491 | # authed.directory_manager.profile.metadata_directory.joinpath( 492 | # "Mass Messages.json" 493 | # ) 494 | # ) 495 | # if authed.isPerformer: 496 | # imported = main_helper.import_json(metadata_filepath) 497 | # if "auth" in imported: 498 | # imported = imported["auth"] 499 | # mass_messages = await authed.get_mass_messages(resume=imported) 500 | # if mass_messages: 501 | # main_helper.export_json(mass_messages, metadata_filepath) 502 | authed.blacklist = await authed.get_blacklist(site_config.blacklists) 503 | if identifiers or site_config.jobs.scrape.subscriptions: 504 | subscriptions.extend( 505 | await self.manage_subscriptions( 506 | authed, identifiers=identifiers # type: ignore 507 | ) 508 | ) 509 | status = True 510 | return status, subscriptions 511 | 512 | async def get_chat_users(self): 513 | chat_users: list[user_types] = [] 514 | for authed in self.datascraper.api.auths: 515 | chats = await authed.get_chats() 516 | for chat in chats: 517 | username: str = chat["withUser"].username 518 | subscription = await authed.get_subscription(identifier=username) 519 | if not subscription: 520 | subscription = chat["withUser"] 521 | chat_users.append(subscription) 522 | return chat_users 523 | 524 | async def get_performer(self, authed: auth_types, db_performer: DBUserModel): 525 | if authed.id == db_performer.id: 526 | performer = authed.user 527 | else: 528 | subscriptions = await authed.get_subscriptions( 529 | identifiers=[db_performer.id] 530 | ) 531 | if not subscriptions: 532 | paid_contents = await authed.get_paid_content( 533 | performer_id=db_performer.id 534 | ) 535 | if not paid_contents: 536 | # performer = await authed.get_user(db_performer.id) 537 | # if not performer.subscribed_by_data: 538 | # return None 539 | return None 540 | else: 541 | performer = [ 542 | x.get_author() 543 | for x in paid_contents 544 | if x.get_author().id == db_performer.id 545 | ][0] 546 | temp_performer = await authed.get_user(performer.id, refresh=True) 547 | if not temp_performer: 548 | performer.is_deleted = True 549 | else: 550 | performer = subscriptions[0].user 551 | if not performer.is_subscribed(): 552 | paid_contents = await authed.get_paid_content( 553 | performer_id=db_performer.id 554 | ) 555 | if isinstance( 556 | performer, ultima_scraper_api.onlyfans_classes.user_model.create_user 557 | ): 558 | if performer.is_blocked: 559 | await performer.unblock() 560 | performer.add_aliases([x.username for x in db_performer.aliases]) 561 | performer.username = performer.get_usernames()[0] 562 | return performer 563 | -------------------------------------------------------------------------------- /ultima_scraper_collection/projects/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/projects/__init__.py -------------------------------------------------------------------------------- /ultima_scraper_collection/projects/project_manager.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import Any 3 | 4 | import paramiko 5 | from sqlalchemy import MetaData 6 | from sshtunnel import SSHTunnelForwarder # type: ignore 7 | from ultima_scraper_db.managers.database_manager import Alembica, DatabaseManager 8 | 9 | 10 | class Project: 11 | def __init__( 12 | self, 13 | name: str, 14 | ) -> None: 15 | self.name = name 16 | self.db_manager = DatabaseManager() 17 | 18 | def handle_ssh(self, db_info: dict[str, Any]): 19 | ssh_auth_info = db_info["ssh"] 20 | if ssh_auth_info["host"]: 21 | private_key_filepath = ssh_auth_info["private_key_filepath"] 22 | ssh_private_key_password = ssh_auth_info["private_key_password"] 23 | private_key = ( 24 | paramiko.RSAKey.from_private_key_file( 25 | private_key_filepath, ssh_private_key_password 26 | ).key 27 | if private_key_filepath 28 | else None 29 | ) 30 | random_port = random.randint(6000, 6999) 31 | ssh_obj = SSHTunnelForwarder( 32 | (ssh_auth_info["host"], ssh_auth_info["port"]), 33 | ssh_username=ssh_auth_info["username"], 34 | ssh_pkey=private_key, 35 | ssh_private_key_password=ssh_private_key_password, 36 | remote_bind_address=(db_info["host"], db_info["port"]), 37 | local_bind_address=(db_info["host"], random_port), 38 | ) 39 | db_info["ssh"] = ssh_obj 40 | else: 41 | db_info["ssh"] = None 42 | return db_info 43 | 44 | async def _init_db( 45 | self, 46 | db_info: dict[str, Any], 47 | alembica: Alembica, 48 | metadata: MetaData = MetaData(), 49 | echo: bool = False, 50 | upgrade: bool = False, 51 | ): 52 | if upgrade: 53 | alembica.is_generate = True 54 | alembica.is_migrate = True 55 | else: 56 | alembica.is_generate = False 57 | alembica.is_migrate = False 58 | db_info = self.handle_ssh(db_info) 59 | temp_database = self.db_manager.create_database( 60 | **db_info, metadata=metadata, alembica=alembica 61 | ) 62 | self.db_manager.add_database(temp_database) 63 | await temp_database.init_db(echo) 64 | return temp_database 65 | -------------------------------------------------------------------------------- /ultima_scraper_collection/projects/ultima_archive.py: -------------------------------------------------------------------------------- 1 | from ultima_scraper_collection.config import UltimaScraperCollectionConfig 2 | from ultima_scraper_collection.projects.project_manager import Project 3 | from ultima_scraper_db.databases.ultima_archive import merged_metadata 4 | from ultima_scraper_db.databases.ultima_archive.api.client import UAClient 5 | from ultima_scraper_db.databases.ultima_archive.database_api import ArchiveAPI 6 | from ultima_scraper_db.managers.database_manager import Alembica 7 | 8 | 9 | class UltimaArchiveProject(Project): 10 | async def init(self, config: UltimaScraperCollectionConfig): 11 | # We could pass a database manager instead of config 12 | db_info = config.settings.databases[0].connection_info.dict() 13 | ultima_archive_db = await super()._init_db(db_info, Alembica(), merged_metadata) 14 | self.ultima_archive_db_api = await ArchiveAPI(ultima_archive_db).init() 15 | self.fast_api = UAClient(self.ultima_archive_db_api) 16 | UAClient.database_api = self.ultima_archive_db_api 17 | UAClient.config = config 18 | return self 19 | -------------------------------------------------------------------------------- /ultima_scraper_collection/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/py.typed --------------------------------------------------------------------------------