├── .gitignore
├── poetry.lock
├── pyproject.toml
├── test.py
├── tests
    └── __init__.py
└── ultima_scraper_collection
    ├── __init__.py
    ├── config.py
    ├── helpers
        ├── __init__.py
        └── main_helper.py
    ├── managers
        ├── __init__.py
        ├── aio_pika_wrapper.py
        ├── content_manager.py
        ├── database_manager
        │   ├── __init__.py
        │   ├── connections
        │   │   ├── __init__.py
        │   │   └── sqlite
        │   │   │   ├── __init__.py
        │   │   │   ├── databases
        │   │   │       └── user_data
        │   │   │       │   └── migration
        │   │   │       │       ├── __init__.py
        │   │   │       │       ├── alembic.ini
        │   │   │       │       ├── alembic
        │   │   │       │           ├── README
        │   │   │       │           ├── env.py
        │   │   │       │           ├── script.py.mako
        │   │   │       │           └── versions
        │   │   │       │           │   ├── 0d4d92c0498e_content.py
        │   │   │       │           │   ├── 1454e4d1c6b8_content.py
        │   │   │       │           │   ├── 37c4f2719d65_content.py
        │   │   │       │           │   ├── 5493253cc03c_content.py
        │   │   │       │           │   ├── b791cf213df9_content.py
        │   │   │       │           │   └── d2f2002f3c36_content.py
        │   │   │       │       └── base_user_database.db
        │   │   │   ├── legacy_databases
        │   │   │       ├── __init__.py
        │   │   │       ├── messages
        │   │   │       │   ├── __init__.py
        │   │   │       │   └── migration
        │   │   │       │   │   ├── __init__.py
        │   │   │       │   │   ├── alembic.ini
        │   │   │       │   │   ├── alembic
        │   │   │       │   │       ├── env.py
        │   │   │       │   │       ├── script.py.mako
        │   │   │       │   │       └── versions
        │   │   │       │   │       │   ├── 2c36fcc0b921_content.py
        │   │   │       │   │       │   ├── 7c1c6e101059_content.py
        │   │   │       │   │       │   ├── aeb9fe314556_content.py
        │   │   │       │   │       │   ├── bf20242a238f_content.py
        │   │   │       │   │       │   └── d0118d8ec0b4_content.py
        │   │   │       │   │   ├── messages.py
        │   │   │       │   │   └── test_messages.db
        │   │   │       ├── posts
        │   │   │       │   └── migration
        │   │   │       │   │   ├── alembic.ini
        │   │   │       │   │   ├── alembic
        │   │   │       │   │       ├── env.py
        │   │   │       │   │       ├── script.py.mako
        │   │   │       │   │       └── versions
        │   │   │       │   │       │   ├── 194e05269f09_content.py
        │   │   │       │   │       │   ├── 5b4bea08c27f_content.py
        │   │   │       │   │       │   ├── 6b1b10eb67de_content.py
        │   │   │       │   │       │   ├── 990fc1108317_content.py
        │   │   │       │   │       │   └── a918b6b05d2f_content.py
        │   │   │       │   │   ├── posts.py
        │   │   │       │   │   └── test_posts.db
        │   │   │       └── stories
        │   │   │       │   └── migration
        │   │   │       │       ├── alembic.ini
        │   │   │       │       ├── alembic
        │   │   │       │           ├── env.py
        │   │   │       │           ├── script.py.mako
        │   │   │       │           └── versions
        │   │   │       │           │   ├── 29f675c35eee_content.py
        │   │   │       │           │   ├── 2e4f8364f7e2_content.py
        │   │   │       │           │   ├── 3076beb33c1b_content.py
        │   │   │       │           │   ├── e0c73f066547_content.py
        │   │   │       │           │   └── ebc3f4bb0782_content.py
        │   │   │       │       ├── stories.py
        │   │   │       │       └── test_stories.db
        │   │   │   ├── models
        │   │   │       ├── __init__.py
        │   │   │       ├── api_model.py
        │   │   │       ├── media_model.py
        │   │   │       └── user_database.py
        │   │   │   └── sqlite_database.py
        │   └── database_manager.py
        ├── datascraper_manager
        │   ├── __init__.py
        │   ├── datascraper_manager.py
        │   └── datascrapers
        │   │   ├── __init__.py
        │   │   ├── fansly.py
        │   │   └── onlyfans.py
        ├── download_manager.py
        ├── filesystem_manager.py
        ├── metadata_manager
        │   ├── __init__.py
        │   └── metadata_manager.py
        ├── option_manager.py
        └── server_manager.py
    ├── modules
        ├── __init__.py
        └── module_streamliner.py
    ├── projects
        ├── __init__.py
        ├── project_manager.py
        └── ultima_archive.py
    └── py.typed


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.gitignore.io/api/code,linux,python,pycharm,windows
  2 | # Edit at https://www.gitignore.io/?templates=code,linux,python,pycharm,windows
  3 | 
  4 | ### Code ###
  5 | .vscode/*
  6 | 
  7 | ### PyCharm Patch ###
  8 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
  9 | .idea/*
 10 | 
 11 | ### Python ###
 12 | # Byte-compiled / optimized / DLL files
 13 | __pycache__/
 14 | *.py[cod]
 15 | *$py.class
 16 | 
 17 | # C extensions
 18 | *.so
 19 | 
 20 | # Distribution / packaging
 21 | .Python
 22 | build/
 23 | develop-eggs/
 24 | dist/
 25 | downloads/
 26 | eggs/
 27 | .eggs/
 28 | lib/
 29 | lib64/
 30 | parts/
 31 | sdist/
 32 | var/
 33 | wheels/
 34 | pip-wheel-metadata/
 35 | share/python-wheels/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | MANIFEST
 40 | 
 41 | ### venv ###
 42 | # Virtualenv
 43 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
 44 | .Python
 45 | [Bb]in
 46 | [Ii]nclude
 47 | [Ll]ib
 48 | [Ll]ib64
 49 | [Ll]ocal
 50 | [Ss]cripts
 51 | pyvenv.cfg
 52 | .env
 53 | .venv
 54 | env/
 55 | venv/
 56 | ENV/
 57 | env.bak/
 58 | venv.bak/
 59 | pip-selfcheck.json
 60 | 
 61 | 
 62 | # PyInstaller
 63 | #  Usually these files are written by a python script from a template
 64 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 65 | *.manifest
 66 | *.spec
 67 | 
 68 | # Installer logs
 69 | pip-log.txt
 70 | pip-delete-this-directory.txt
 71 | 
 72 | # Unit test / coverage reports
 73 | htmlcov/
 74 | .tox/
 75 | .nox/
 76 | .coverage
 77 | .coverage.*
 78 | .cache
 79 | nosetests.xml
 80 | coverage.xml
 81 | *.cover
 82 | .hypothesis/
 83 | .pytest_cache/
 84 | 
 85 | # Translations
 86 | *.mo
 87 | *.pot
 88 | 
 89 | # Scrapy stuff:
 90 | .scrapy
 91 | 
 92 | # Sphinx documentation
 93 | docs/_build/
 94 | 
 95 | # PyBuilder
 96 | target/
 97 | 
 98 | # pyenv
 99 | .python-version
100 | 
101 | # pipenv
102 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
103 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
104 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
105 | #   install all needed dependencies.
106 | #Pipfile.lock
107 | 
108 | # mkdocs documentation
109 | /site
110 | 
111 | # mypy
112 | .mypy_cache/
113 | .dmypy.json
114 | dmypy.json
115 | 
116 | ### Windows ###
117 | # Windows thumbnail cache files
118 | Thumbs.db
119 | Thumbs.db:encryptable
120 | ehthumbs.db
121 | ehthumbs_vista.db
122 | 
123 | ### Mac ###
124 | .DS_Store
125 | 
126 | # Dump file
127 | *.stackdump
128 | 
129 | # Folder config file
130 | [Dd]esktop.ini
131 | 
132 | # Recycle Bin used on file shares
133 | $RECYCLE.BIN/
134 | 
135 | # Windows Installer files
136 | *.cab
137 | *.msi
138 | *.msix
139 | *.msm
140 | *.msp
141 | 
142 | # Windows shortcuts
143 | *.lnk
144 | 
145 | # End of https://www.gitignore.io/api/code,linux,python,pycharm,windows
146 | 
147 | # Project Specific
148 | typings
149 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "ultima-scraper-collection"
 3 | version = "2.3.18"
 4 | description = ""
 5 | authors = ["UltimaHoarder <1285176+UltimaHoarder@users.noreply.github.com>"]
 6 | packages = [{ include = "ultima_scraper_collection" }]
 7 | include = ["ultima_scraper_collection/py.typed"]
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = ">=3.10,<4"
11 | sqlalchemy = "^2.0.1"
12 | psycopg2 = "^2.9.5"
13 | alembic = "^1.9.2"
14 | ffmpeg-python = "^0.2.0"
15 | pydantic = "^2.0"
16 | netifaces = "^0.11.0"
17 | sshtunnel = "^0.4.0"
18 | inflection = "^0.5.1"
19 | alive-progress = "^3.1.5"
20 | aio-pika = "^9.4.1"
21 | ujson = "^5.10.0"
22 | 
23 | ultima-scraper-api = "^2.0"
24 | ultima-scraper-renamer = "^1.0"
25 | ultima-scraper-db = "^0.3"
26 | ultima-scraper-detector = "^0.1"
27 | appdirs = "^1.4.4"
28 | 
29 | [tool.poetry.group.dev.dependencies]
30 | python-semantic-release = "^7.33.2"
31 | black = { version = "^23.3.0", allow-prereleases = true }
32 | 
33 | [tool.semantic_release]
34 | version_toml = "pyproject.toml:tool.poetry.version"
35 | 
36 | [build-system]
37 | requires = ["poetry-core"]
38 | build-backend = "poetry.core.masonry.api"
39 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import ultima_scraper_api
 4 | 
 5 | from ultima_scraper_collection.config import UltimaScraperCollectionConfig
 6 | from ultima_scraper_collection.managers.datascraper_manager.datascraper_manager import (
 7 |     DataScraperManager,
 8 | )
 9 | from ultima_scraper_collection.managers.server_manager import ServerManager
10 | 
11 | 
12 | async def main():
13 |     config = UltimaScraperCollectionConfig()
14 |     server_manager = ServerManager(config.settings.databases[0].connection_info.dict())
15 |     api = ultima_scraper_api.select_api("OnlyFans")
16 |     _datascraper = DataScraperManager(server_manager, config).select_datascraper(api)
17 |     pass
18 | 
19 | 
20 | asyncio.run(main())
21 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/tests/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/__init__.py:
--------------------------------------------------------------------------------
1 | from ultima_scraper_collection.managers.datascraper_manager.datascrapers.fansly import (
2 |     FanslyDataScraper,
3 | )
4 | from ultima_scraper_collection.managers.datascraper_manager.datascrapers.onlyfans import (
5 |     OnlyFansDataScraper,
6 | )
7 | 
8 | datascraper_types = OnlyFansDataScraper | FanslyDataScraper
9 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/config.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import ujson
  4 | from appdirs import user_config_dir
  5 | from pydantic import BaseModel, StrictBool, StrictInt, StrictStr
  6 | from ultima_scraper_api.config import FanslyAPIConfig
  7 | from ultima_scraper_api.config import GlobalAPI as USAGlobalAPI
  8 | from ultima_scraper_api.config import OnlyFansAPIConfig, Settings
  9 | from ultima_scraper_api.config import Sites as USASites
 10 | from ultima_scraper_api.config import UltimaScraperAPIConfig
 11 | 
 12 | 
 13 | class Jobs(BaseModel):
 14 |     class Scrape(BaseModel):
 15 |         subscriptions: bool = True
 16 |         messages: bool = True
 17 |         paid_contents: bool = True
 18 | 
 19 |     class Metadata:
 20 |         content: bool = True
 21 |         comments: bool = True
 22 | 
 23 |     scrape: Scrape = Scrape()
 24 |     metadata: Scrape = Scrape()
 25 | 
 26 | 
 27 | class Directory(BaseModel):
 28 |     path: Path | None = None
 29 |     minimum_space: int = -1
 30 |     store: bool = True
 31 |     overflow: bool = True
 32 | 
 33 | 
 34 | class GlobalXPathSetup(BaseModel):
 35 |     directories: list[Directory] = [Directory(path=Path("__user_data__").absolute())]
 36 |     directory_format: Path = Path()
 37 | 
 38 | 
 39 | class DownloadPathSetup(GlobalXPathSetup):
 40 |     filename_format: Path = Path()
 41 |     text_length: int = 255
 42 |     date_format: str = "%Y-%m-%d"
 43 |     overwrite_files: bool = True
 44 | 
 45 | 
 46 | class Trash(BaseModel):
 47 |     cleanup: bool = True
 48 | 
 49 | 
 50 | class ToolSettings(BaseModel):
 51 |     active: bool = True
 52 | 
 53 | 
 54 | class Renamer(ToolSettings):
 55 |     pass
 56 | 
 57 | 
 58 | class Reformatter(ToolSettings):
 59 |     pass
 60 | 
 61 | 
 62 | class Downloader(ToolSettings):
 63 |     pass
 64 | 
 65 | 
 66 | class SSHConnection(BaseModel):
 67 |     username: str | None = None
 68 |     private_key_filepath: Path | None = None
 69 |     private_key_password: str | None = None
 70 |     host: str | None = None
 71 |     port: int = 22
 72 | 
 73 | 
 74 | class DatabaseInfo(BaseModel):
 75 |     name: str = "ultima_archive"
 76 |     host: str = "localhost"
 77 |     port: int = 5432
 78 |     username: str | None = None
 79 |     password: str | None = None
 80 |     ssh: SSHConnection = SSHConnection()
 81 | 
 82 | 
 83 | class Database(BaseModel):
 84 |     connection_info: DatabaseInfo = DatabaseInfo()
 85 |     main: bool = True
 86 |     active: bool = True
 87 | 
 88 | 
 89 | class Tools(BaseModel):
 90 |     renamer: Renamer = Renamer()
 91 |     reformatter: Reformatter = Reformatter()
 92 |     downloader: Downloader = Downloader()
 93 | 
 94 | 
 95 | auto_types = list[int | str] | StrictInt | StrictStr | StrictBool | None
 96 | 
 97 | 
 98 | class GlobalAPI(USAGlobalAPI):
 99 |     auto_profile_choice: auto_types = None
100 |     auto_performer_choice: auto_types = None
101 |     auto_content_choice: auto_types = None
102 |     auto_media_choice: auto_types = None
103 |     jobs: Jobs = Jobs()
104 |     metadata_setup: GlobalXPathSetup = GlobalXPathSetup()
105 |     metadata_setup.directory_format: Path = Path(
106 |         "{site_name}/{first_letter}/{model_username}/Metadata"
107 |     )
108 |     download_setup: DownloadPathSetup = DownloadPathSetup()
109 |     download_setup.directory_format: Path = Path(
110 |         "{site_name}/{first_letter}/{model_username}/{api_type}/{value}/{media_type}"
111 |     )
112 |     download_setup.filename_format: Path = Path("{filename}.{ext}")
113 |     blacklists: list[str] = []
114 | 
115 | 
116 | class Sites(USASites):
117 |     class OnlyFansAPIConfig(OnlyFansAPIConfig, GlobalAPI):
118 |         pass
119 | 
120 |     class FanslyAPIConfig(FanslyAPIConfig, GlobalAPI):
121 |         pass
122 | 
123 |     onlyfans: OnlyFansAPIConfig = OnlyFansAPIConfig(auto_content_choice=True)
124 |     fansly: FanslyAPIConfig = FanslyAPIConfig()
125 | 
126 | 
127 | site_config_types = Sites.OnlyFansAPIConfig | Sites.FanslyAPIConfig
128 | 
129 | 
130 | class UltimaScraperCollectionConfig(UltimaScraperAPIConfig):
131 |     class Settings(Settings):
132 |         auto_site_choice: str = ""
133 |         databases: list[Database] = [Database()]
134 |         tools: Tools = Tools()
135 |         trash: Trash = Trash()
136 |         infinite_loop: bool = False
137 |         exit_on_completion: bool = True
138 | 
139 |         def get_main_database(self):
140 |             return [x for x in self.databases if x.main][0]
141 | 
142 |     settings: Settings = Settings()
143 |     site_apis: Sites = Sites()
144 | 
145 |     def load_default_config(self):
146 |         config_dir = user_config_dir("ultima_scraper_verse")  # type: ignore
147 |         config_path = Path(config_dir) / "config.json"  # type: ignore
148 | 
149 |         config_json = ujson.loads(config_path.read_text())
150 |         return UltimaScraperCollectionConfig(**config_json)
151 | 
152 |     def get_site_config(self, site_name: str):
153 |         return getattr(self.site_apis, site_name.lower())
154 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/helpers/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/helpers/main_helper.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | from shutil import disk_usage
  4 | from typing import TYPE_CHECKING, Any
  5 | 
  6 | from ultima_scraper_api import user_types
  7 | 
  8 | from ultima_scraper_collection.config import Directory, UltimaScraperCollectionConfig
  9 | import ujson
 10 | 
 11 | if TYPE_CHECKING:
 12 |     from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import (
 13 |         UserModel as DBUserModel,
 14 |     )
 15 | 
 16 | 
 17 | def check_space(
 18 |     custom_directory: list[Directory],
 19 | ):
 20 |     root = ""
 21 |     while not root:
 22 |         paths: list[dict[str, Any]] = []
 23 |         for directory in custom_directory:
 24 |             # ISSUE
 25 |             # Could cause problems w/ relative/symbolic links that point to another hard drive
 26 |             # Haven't tested if it calculates hard A or relative/symbolic B's total space.
 27 |             # size is in GB
 28 |             assert directory.path
 29 |             obj_Disk = disk_usage(str(directory.path.parent))
 30 |             free = obj_Disk.free / (1024.0**3)
 31 |             x = {}
 32 |             x["path"] = directory.path
 33 |             x["free"] = free
 34 |             x["min_space"] = directory.minimum_space
 35 |             paths.append(x)
 36 |         for item in paths:
 37 |             download_path = item["path"]
 38 |             free = item["free"]
 39 |             if free > item["min_space"]:
 40 |                 root = download_path
 41 |                 break
 42 |     return root
 43 | 
 44 | 
 45 | from ultima_scraper_api.apis.onlyfans.classes.user_model import (
 46 |     create_user as OFUserModel,
 47 | )
 48 | 
 49 | 
 50 | async def is_valuable(user: "DBUserModel | user_types"):
 51 |     """
 52 |     Checks if the user is valuable based on their subscription status or if they have supplied content to a buyer.
 53 | 
 54 |     Args:
 55 |         user (DBUserModel | user_types): The user to check.
 56 | 
 57 |     Returns:
 58 |         bool: True if the user is valuable, False otherwise.
 59 |     """
 60 |     from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import (
 61 |         UserModel as DBUserModel,
 62 |     )
 63 | 
 64 |     if isinstance(user, DBUserModel):
 65 |         if await user.find_buyers(active=True):
 66 |             return True
 67 |         else:
 68 |             return False
 69 |     else:
 70 |         if user.is_performer():
 71 |             if isinstance(user, OFUserModel):
 72 |                 if (
 73 |                     user.subscribed_is_expired_now == False
 74 |                     or await user.get_paid_contents()
 75 |                 ):
 76 |                     return True
 77 |                 else:
 78 |                     return False
 79 |             else:
 80 |                 # We need to add paid_content checker
 81 |                 if user.following:
 82 |                     return True
 83 |                 else:
 84 |                     return False
 85 |         else:
 86 |             return False
 87 | 
 88 | 
 89 | async def is_notif_valuable(api_user: user_types):
 90 |     if await is_valuable(api_user):
 91 |         if await api_user.subscription_price() == 0:
 92 |             if isinstance(api_user, OFUserModel) and await api_user.get_paid_contents():
 93 |                 return True
 94 |             return False
 95 |         else:
 96 |             return True
 97 |     return False
 98 | 
 99 | 
100 | async def walk(directory: Path):
101 |     all_files: list[Path] = []
102 |     for root, _subdirs, files in os.walk(directory):
103 |         x = [Path(root, x) for x in files]
104 |         all_files.extend(x)
105 |     return all_files
106 | 
107 | 
108 | def find_unused_filename(filepath: Path):
109 |     base_name = filepath.stem  # Get the filename without extension
110 |     extension = filepath.suffix  # Get the file extension
111 |     counter = 2
112 | 
113 |     while filepath.exists():
114 |         new_name = f"{base_name} ({counter}){extension}"
115 |         filepath = filepath.with_name(new_name)
116 |         counter += 1
117 | 
118 |     return filepath
119 | 
120 | 
121 | from PIL import Image
122 | 
123 | 
124 | def is_image_valid(file_path: Path):
125 |     try:
126 |         with Image.open(file_path) as img:
127 |             # Attempt to open the image file
128 |             img.load()  # This will load the image data
129 |             return True  # If successful, the image is not corrupted
130 |     except Exception as e:
131 |         # An exception occurred, so the image might be corrupted
132 |         print(f"Error: {e}")
133 |         return False
134 | 
135 | 
136 | def load_config(config_path: Path):
137 |     config_json = ujson.loads(config_path.read_text())
138 |     return UltimaScraperCollectionConfig(**config_json)
139 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/aio_pika_wrapper.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | 
  3 | import aio_pika
  4 | import ujson
  5 | 
  6 | 
  7 | def create_notification(
  8 |     category: str,
  9 |     site_name: str,
 10 |     item: Any,
 11 | ):
 12 |     json_message = {
 13 |         "site_name": site_name,
 14 |         "category": category,
 15 |         "performer_id": item.id,
 16 |         "username": item.username,
 17 |     }
 18 |     message = {"id": item.id, "data": json_message}
 19 |     return message
 20 | 
 21 | 
 22 | def create_message(
 23 |     site_name: str, item: Any, mandatory_jobs: dict[str, dict[str, list[str]]]
 24 | ):
 25 |     json_message = {
 26 |         "site_name": site_name,
 27 |         "performer_id": item.id,
 28 |         "username": item.username,
 29 |         "mandatory_jobs": mandatory_jobs,
 30 |     }
 31 |     message = {"id": item.id, "data": json_message}
 32 |     return message
 33 | 
 34 | 
 35 | class AioPikaWrapper:
 36 |     def __init__(self, host: str = "localhost"):
 37 |         self.amqp_url = f"amqp://{host}/"
 38 |         self.connection = None
 39 |         self.channel = None
 40 | 
 41 |     def get_connection(self):
 42 |         assert self.connection is not None
 43 |         return self.connection
 44 | 
 45 |     def get_channel(self):
 46 |         assert self.channel is not None
 47 |         return self.channel
 48 | 
 49 |     async def connect(self, prefetch_count: int = 0):
 50 |         if self.connection is not None:
 51 |             return
 52 |         self.connection = await aio_pika.connect_robust(self.amqp_url)
 53 |         self.channel = await self.connection.channel()
 54 |         await self.channel.set_qos(prefetch_count=prefetch_count)
 55 | 
 56 |     async def declare_queue(self, queue_name: str, durable: bool = True):
 57 |         if self.channel is None:
 58 |             await self.connect()
 59 |         assert self.channel is not None
 60 |         return await self.channel.declare_queue(
 61 |             queue_name,
 62 |             durable=durable,
 63 |             arguments={"x-message-deduplication": True, "x-max-priority": 10},
 64 |         )
 65 | 
 66 |     async def publish_message(
 67 |         self,
 68 |         queue_name: str,
 69 |         message: dict[str, Any],
 70 |         durable: bool = True,
 71 |         priority: int = 0,
 72 |     ):
 73 |         if self.channel is None:
 74 |             await self.connect()
 75 |         await self.declare_queue(queue_name, durable)
 76 |         assert self.channel is not None
 77 | 
 78 |         message_id = message.get(
 79 |             "id", "default_id"
 80 |         )  # Ensure you have a unique ID for deduplication
 81 |         headers = {"x-deduplication-header": message_id}
 82 |         try:
 83 |             await self.channel.default_exchange.publish(
 84 |                 aio_pika.Message(
 85 |                     body=ujson.dumps(message).encode(),
 86 |                     delivery_mode=(
 87 |                         aio_pika.DeliveryMode.PERSISTENT
 88 |                         if durable
 89 |                         else aio_pika.DeliveryMode.NOT_PERSISTENT
 90 |                     ),
 91 |                     headers=headers,
 92 |                     priority=priority,
 93 |                 ),
 94 |                 routing_key=queue_name,
 95 |             )
 96 |             print(f"Message published to {queue_name}")
 97 |             return True
 98 |         except aio_pika.exceptions.DeliveryError as e:
 99 |             print(f"Error publishing message: {e}")
100 |             return False
101 | 
102 |     async def publish_notification(self, message: dict[str, Any]):
103 |         await self.publish_message("telegram_notifications", message)
104 |         await self.publish_message("discord_notifications", message)
105 | 
106 |     async def close(self):
107 |         if self.connection:
108 |             await self.connection.close()
109 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/content_manager.py:
--------------------------------------------------------------------------------
 1 | from itertools import chain
 2 | from typing import TYPE_CHECKING, Any
 3 | 
 4 | if TYPE_CHECKING:
 5 |     from ultima_scraper_api import auth_types
 6 |     from ultima_scraper_collection.managers.metadata_manager.metadata_manager import (
 7 |         MediaMetadata,
 8 |     )
 9 | 
10 | 
11 | class DefaultCategorizedContent:
12 |     def __init__(self) -> None:
13 |         self.MassMessages: dict[int, dict[str, Any]] = {}
14 |         self.Stories: dict[int, dict[str, Any]] = {}
15 |         self.Posts: dict[int, dict[str, Any]] = {}
16 |         self.Chats: dict[int, dict[str, Any]] = {}
17 |         self.Messages: dict[int, dict[str, Any]] = {}
18 |         self.Highlights: dict[int, dict[str, Any]] = {}
19 | 
20 |     def __iter__(self):
21 |         for attr, value in self.__dict__.items():
22 |             yield attr, value
23 | 
24 |     def find_content(self, content_id: int, content_type: str):
25 |         return getattr(self, content_type)[content_id]
26 | 
27 | 
28 | class ContentManager:
29 |     def __init__(self, authed: "auth_types") -> None:
30 |         self.authed = authed
31 |         self.auth_session = authed.auth_session
32 |         self.categorized = DefaultCategorizedContent()
33 |         self.media_manager: MediaManager = MediaManager()
34 | 
35 |     def get_contents(self, content_type: str):
36 |         return getattr(self.categorized, content_type)
37 | 
38 |     def set_content(self, content_type: str, scraped: list[Any]):
39 |         for content in scraped:
40 |             content_item = getattr(self.categorized, content_type)
41 |             content_item[content.content_id] = content
42 | 
43 |     def find_content(self, content_id: int, content_type: str):
44 |         found_content = None
45 |         try:
46 |             found_content = getattr(self.categorized, content_type)[content_id]
47 |         except KeyError:
48 |             pass
49 |         return found_content
50 | 
51 |     def find_media(self, category: str, media_id: int):
52 |         content_items = getattr(self.categorized, category)
53 |         for content in content_items.values():
54 |             for media in content.medias:
55 |                 if media.id == media_id:
56 |                     return media
57 | 
58 |     def get_all_media_ids(self):
59 |         return list(chain(*[x for x in self.categorized.__dict__.values()]))
60 | 
61 | 
62 | class MediaManager:
63 |     def __init__(self) -> None:
64 |         self.medias: dict[int, "MediaMetadata"] = {}
65 |         self.invalid_medias: list["MediaMetadata"] = []
66 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic.ini:
--------------------------------------------------------------------------------
 1 | # A generic, single database configuration.
 2 | 
 3 | [alembic]
 4 | # path to migration scripts
 5 | script_location = alembic
 6 | 
 7 | # template used to generate migration files
 8 | # file_template = %%(rev)s_%%(slug)s
 9 | 
10 | # sys.path path, will be prepended to sys.path if present.
11 | # defaults to the current working directory.
12 | prepend_sys_path = .
13 | 
14 | # timezone to use when rendering the date
15 | # within the migration file as well as the filename.
16 | # string value is passed to dateutil.tz.gettz()
17 | # leave blank for localtime
18 | # timezone =
19 | 
20 | # max length of characters to apply to the
21 | # "slug" field
22 | # truncate_slug_length = 40
23 | 
24 | # set to 'true' to run the environment during
25 | # the 'revision' command, regardless of autogenerate
26 | # revision_environment = false
27 | 
28 | # set to 'true' to allow .pyc and .pyo files without
29 | # a source .py file to be detected as revisions in the
30 | # versions/ directory
31 | # sourceless = false
32 | 
33 | # version location specification; this defaults
34 | # to alembic/versions.  When using multiple version
35 | # directories, initial revisions must be specified with --version-path
36 | # version_locations = %(here)s/bar %(here)s/bat alembic/versions
37 | 
38 | # the output encoding used when revision files
39 | # are written from script.py.mako
40 | # output_encoding = utf-8
41 | 
42 | sqlalchemy.url = driver://user:pass@localhost/dbname
43 | 
44 | 
45 | [post_write_hooks]
46 | # post_write_hooks defines scripts or Python functions that are run
47 | # on newly generated revision scripts.  See the documentation for further
48 | # detail and examples
49 | 
50 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
51 | # hooks = black
52 | # black.type = console_scripts
53 | # black.entrypoint = black
54 | # black.options = -l 79 REVISION_SCRIPT_FILENAME
55 | 
56 | # Logging configuration
57 | [loggers]
58 | keys = root,sqlalchemy,alembic
59 | 
60 | [handlers]
61 | keys = console
62 | 
63 | [formatters]
64 | keys = generic
65 | 
66 | [logger_root]
67 | level = WARN
68 | handlers = console
69 | qualname =
70 | 
71 | [logger_sqlalchemy]
72 | level = WARN
73 | handlers =
74 | qualname = sqlalchemy.engine
75 | 
76 | [logger_alembic]
77 | level = WARN
78 | handlers =
79 | qualname = alembic
80 | 
81 | [handler_console]
82 | class = StreamHandler
83 | args = (sys.stderr,)
84 | level = NOTSET
85 | formatter = generic
86 | 
87 | [formatter_generic]
88 | format = %(levelname)-5.5s [%(name)s] %(message)s
89 | datefmt = %H:%M:%S
90 | 
91 | [custom]
92 | database_name = None
93 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/README:
--------------------------------------------------------------------------------
1 | Generic single-database configuration.


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/env.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | from logging.config import fileConfig
 3 | 
 4 | from alembic import context
 5 | from sqlalchemy import engine_from_config, pool
 6 | 
 7 | # this is the Alembic Config object, which provides
 8 | # access to the values within the .ini file in use.
 9 | config = context.config
10 | 
11 | # Interpret the config file for Python logging.
12 | # This line sets up loggers basically.
13 | fileConfig(config.config_file_name)
14 | 
15 | # add your model's MetaData object here
16 | # for 'autogenerate' support
17 | # from myapp import mymodel
18 | # target_metadata = mymodel.Base.metadata
19 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models import (
20 |     user_database,
21 | )
22 | 
23 | target_metadata = user_database.Base.metadata
24 | pass
25 | # other values from the config, defined by the needs of env.py,
26 | # can be acquired:
27 | # my_important_option = config.get_main_option("my_important_option")
28 | # ... etc.
29 | 
30 | 
31 | def run_migrations_offline():
32 |     """Run migrations in 'offline' mode.
33 | 
34 |     This configures the context with just a URL
35 |     and not an Engine, though an Engine is acceptable
36 |     here as well.  By skipping the Engine creation
37 |     we don't even need a DBAPI to be available.
38 | 
39 |     Calls to context.execute() here emit the given string to the
40 |     script output.
41 | 
42 |     """
43 |     url = config.get_main_option("sqlalchemy.url")
44 |     context.configure(
45 |         url=url,
46 |         target_metadata=target_metadata,
47 |         literal_binds=True,
48 |         dialect_opts={"paramstyle": "named"},
49 |     )
50 | 
51 |     with context.begin_transaction():
52 |         context.run_migrations()
53 | 
54 | 
55 | def run_migrations_online():
56 |     """Run migrations in 'online' mode.
57 | 
58 |     In this scenario we need to create an Engine
59 |     and associate a connection with the context.
60 | 
61 |     """
62 |     connectable = engine_from_config(
63 |         config.get_section(config.config_ini_section),
64 |         prefix="sqlalchemy.",
65 |         poolclass=pool.NullPool,
66 |     )
67 | 
68 |     with connectable.connect() as connection:
69 |         context.configure(connection=connection, target_metadata=target_metadata)
70 | 
71 |         with context.begin_transaction():
72 |             context.run_migrations()
73 | 
74 | 
75 | if context.is_offline_mode():
76 |     run_migrations_offline()
77 | else:
78 |     run_migrations_online()
79 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | ${imports if imports else ""}
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = ${repr(up_revision)}
14 | down_revision = ${repr(down_revision)}
15 | branch_labels = ${repr(branch_labels)}
16 | depends_on = ${repr(depends_on)}
17 | 
18 | 
19 | def upgrade():
20 |     ${upgrades if upgrades else "pass"}
21 | 
22 | 
23 | def downgrade():
24 |     ${downgrades if downgrades else "pass"}
25 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/versions/0d4d92c0498e_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | 
 3 | """content
 4 | 
 5 | Revision ID: 0d4d92c0498e
 6 | Revises: b791cf213df9
 7 | Create Date: 2022-01-14 20:15:27.019051
 8 | 
 9 | """
10 | import sqlalchemy as sa
11 | from alembic import op
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "0d4d92c0498e"
15 | down_revision = "b791cf213df9"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     op.add_column("products", sa.Column("title", sa.String(), nullable=True))
23 |     # ### end Alembic commands ###
24 | 
25 | 
26 | def downgrade():
27 |     # ### commands auto generated by Alembic - please adjust! ###
28 |     op.drop_column("products", "title")
29 |     # ### end Alembic commands ###
30 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/versions/1454e4d1c6b8_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | 
 3 | """content
 4 | 
 5 | Revision ID: 1454e4d1c6b8
 6 | Revises: 37c4f2719d65
 7 | Create Date: 2023-02-12 23:17:29.239758
 8 | 
 9 | """
10 | import sqlalchemy as sa
11 | from alembic import op
12 | from sqlalchemy import Column, Integer
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision = "1454e4d1c6b8"
16 | down_revision = "37c4f2719d65"
17 | branch_labels = None
18 | depends_on = None
19 | 
20 | 
21 | def upgrade():
22 |     # ### commands auto generated by Alembic - please adjust! ###
23 |     conn = op.get_bind()
24 | 
25 |     meta = sa.MetaData()
26 |     meta.reflect(bind=conn, only=("medias",))
27 |     old_table = meta.tables["medias"]
28 |     if "media_id_2" not in old_table.columns:
29 |         with op.batch_alter_table("medias", recreate="always") as batch_op:
30 |             batch_op.add_column(Column("media_id_2", Integer), insert_after="id")
31 | 
32 |     pass
33 |     # ### end Alembic commands ###
34 | 
35 | 
36 | def downgrade():
37 |     # ### commands auto generated by Alembic - please adjust! ###
38 |     pass
39 |     # ### end Alembic commands ###
40 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/versions/37c4f2719d65_content.py:
--------------------------------------------------------------------------------
 1 | """content
 2 | 
 3 | Revision ID: 37c4f2719d65
 4 | Revises: 0d4d92c0498e
 5 | Create Date: 2022-03-10 16:00:20.390009
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | 
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = "37c4f2719d65"
14 | down_revision = "0d4d92c0498e"
15 | branch_labels = None
16 | depends_on = None
17 | 
18 | 
19 | def upgrade():
20 |     # ### commands auto generated by Alembic - please adjust! ###
21 |     op.create_table(
22 |         "profiles",
23 |         sa.Column("id", sa.Integer(), nullable=False),
24 |         sa.Column("user_id", sa.Integer(), nullable=False),
25 |         sa.Column("username", sa.String(), nullable=False),
26 |         sa.PrimaryKeyConstraint("id"),
27 |         sa.UniqueConstraint("username"),
28 |     )
29 |     # ### end Alembic commands ###
30 | 
31 | 
32 | def downgrade():
33 |     # ### commands auto generated by Alembic - please adjust! ###
34 |     op.drop_table("profiles")
35 |     # ### end Alembic commands ###
36 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/versions/5493253cc03c_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: 5493253cc03c
 5 | Revises: 
 6 | Create Date: 2021-06-21 14:22:30.585216
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "5493253cc03c"
15 | down_revision = None
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     op.create_table(
23 |         "medias",
24 |         sa.Column("id", sa.Integer(), nullable=False),
25 |         sa.Column("media_id", sa.Integer(), nullable=True),
26 |         sa.Column("post_id", sa.Integer(), nullable=False),
27 |         sa.Column("link", sa.String(), nullable=True),
28 |         sa.Column("directory", sa.String(), nullable=True),
29 |         sa.Column("filename", sa.String(), nullable=True),
30 |         sa.Column("size", sa.Integer(), nullable=True),
31 |         sa.Column("api_type", sa.String(), nullable=True),
32 |         sa.Column("media_type", sa.String(), nullable=True),
33 |         sa.Column("preview", sa.Integer(), nullable=True),
34 |         sa.Column("linked", sa.String(), nullable=True),
35 |         sa.Column("downloaded", sa.Integer(), nullable=True),
36 |         sa.Column("created_at", sa.TIMESTAMP(), nullable=True),
37 |         sa.PrimaryKeyConstraint("id"),
38 |         sa.UniqueConstraint("media_id"),
39 |     )
40 |     op.create_table(
41 |         "messages",
42 |         sa.Column("id", sa.Integer(), nullable=False),
43 |         sa.Column("post_id", sa.Integer(), nullable=False),
44 |         sa.Column("text", sa.String(), nullable=True),
45 |         sa.Column("price", sa.Integer(), nullable=True),
46 |         sa.Column("paid", sa.Integer(), nullable=True),
47 |         sa.Column("archived", sa.Boolean(), nullable=True),
48 |         sa.Column("created_at", sa.TIMESTAMP(), nullable=True),
49 |         sa.Column("user_id", sa.Integer(), nullable=True),
50 |         sa.PrimaryKeyConstraint("id"),
51 |         sa.UniqueConstraint("post_id"),
52 |     )
53 |     op.create_table(
54 |         "posts",
55 |         sa.Column("id", sa.Integer(), nullable=False),
56 |         sa.Column("post_id", sa.Integer(), nullable=False),
57 |         sa.Column("text", sa.String(), nullable=True),
58 |         sa.Column("price", sa.Integer(), nullable=True),
59 |         sa.Column("paid", sa.Integer(), nullable=True),
60 |         sa.Column("archived", sa.Boolean(), nullable=True),
61 |         sa.Column("created_at", sa.TIMESTAMP(), nullable=True),
62 |         sa.PrimaryKeyConstraint("id"),
63 |         sa.UniqueConstraint("post_id"),
64 |     )
65 |     op.create_table(
66 |         "stories",
67 |         sa.Column("id", sa.Integer(), nullable=False),
68 |         sa.Column("post_id", sa.Integer(), nullable=False),
69 |         sa.Column("text", sa.String(), nullable=True),
70 |         sa.Column("price", sa.Integer(), nullable=True),
71 |         sa.Column("paid", sa.Integer(), nullable=True),
72 |         sa.Column("archived", sa.Boolean(), nullable=True),
73 |         sa.Column("created_at", sa.TIMESTAMP(), nullable=True),
74 |         sa.PrimaryKeyConstraint("id"),
75 |         sa.UniqueConstraint("post_id"),
76 |     )
77 |     # ### end Alembic commands ###
78 | 
79 | 
80 | def downgrade():
81 |     # ### commands auto generated by Alembic - please adjust! ###
82 |     op.drop_table("stories")
83 |     op.drop_table("posts")
84 |     op.drop_table("messages")
85 |     op.drop_table("medias")
86 |     # ### end Alembic commands ###
87 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/versions/b791cf213df9_content.py:
--------------------------------------------------------------------------------
 1 | """content
 2 | 
 3 | Revision ID: b791cf213df9
 4 | Revises: 5493253cc03c
 5 | Create Date: 2021-11-16 16:33:04.723478
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | 
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = "b791cf213df9"
14 | down_revision = "5493253cc03c"
15 | branch_labels = None
16 | depends_on = None
17 | 
18 | 
19 | def upgrade():
20 |     # ### commands auto generated by Alembic - please adjust! ###
21 |     op.create_table(
22 |         "others",
23 |         sa.Column("id", sa.Integer(), nullable=False),
24 |         sa.Column("post_id", sa.Integer(), nullable=False),
25 |         sa.Column("text", sa.String(), nullable=True),
26 |         sa.Column("price", sa.Integer(), nullable=True),
27 |         sa.Column("paid", sa.Integer(), nullable=True),
28 |         sa.Column("archived", sa.Boolean(), nullable=True),
29 |         sa.Column("created_at", sa.TIMESTAMP(), nullable=True),
30 |         sa.PrimaryKeyConstraint("id"),
31 |         sa.UniqueConstraint("post_id"),
32 |     )
33 |     op.create_table(
34 |         "products",
35 |         sa.Column("id", sa.Integer(), nullable=False),
36 |         sa.Column("post_id", sa.Integer(), nullable=False),
37 |         sa.Column("text", sa.String(), nullable=True),
38 |         sa.Column("price", sa.Integer(), nullable=True),
39 |         sa.Column("paid", sa.Integer(), nullable=True),
40 |         sa.Column("archived", sa.Boolean(), nullable=True),
41 |         sa.Column("created_at", sa.TIMESTAMP(), nullable=True),
42 |         sa.PrimaryKeyConstraint("id"),
43 |         sa.UniqueConstraint("post_id"),
44 |     )
45 |     # ### end Alembic commands ###
46 | 
47 | 
48 | def downgrade():
49 |     # ### commands auto generated by Alembic - please adjust! ###
50 |     op.drop_table("products")
51 |     op.drop_table("others")
52 |     # ### end Alembic commands ###
53 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/alembic/versions/d2f2002f3c36_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | 
 3 | """content
 4 | 
 5 | Revision ID: d2f2002f3c36
 6 | Revises: 1454e4d1c6b8
 7 | Create Date: 2023-02-13 22:40:57.202281
 8 | 
 9 | """
10 | import sqlalchemy as sa
11 | from alembic import op
12 | from sqlalchemy.orm import Session
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision = "d2f2002f3c36"
16 | down_revision = "1454e4d1c6b8"
17 | branch_labels = None
18 | depends_on = None
19 | 
20 | 
21 | def upgrade():
22 |     # ### commands auto generated by Alembic - please adjust! ###
23 |     invalid_conn = op.get_bind()
24 |     database_url = str(invalid_conn.engine.url)
25 |     conn = sa.create_engine(database_url)
26 |     session = Session(bind=conn)
27 |     res = session.execute(sa.text("SELECT id,media_id FROM medias;"))
28 |     results = res.fetchall()
29 |     meta = sa.MetaData()
30 |     meta.reflect(bind=conn, only=("medias",))
31 |     old_table = meta.tables["medias"]
32 | 
33 |     session = Session(bind=conn)
34 |     for items in results:
35 |         formatted = dict(items._mapping)
36 |         (
37 |             session.query(old_table)
38 |             .filter(old_table.c.id == formatted["id"])
39 |             .update({"media_id_2": formatted["media_id"]})
40 |         )
41 |         session.commit()
42 |     with op.batch_alter_table("medias") as batch_op:
43 |         batch_op.drop_column("media_id")
44 |         batch_op.alter_column("media_id_2", new_column_name="media_id")
45 |     # ### end Alembic commands ###
46 | 
47 | 
48 | def downgrade():
49 |     # ### commands auto generated by Alembic - please adjust! ###
50 |     pass
51 |     # ### end Alembic commands ###
52 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/base_user_database.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/databases/user_data/migration/base_user_database.db


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic.ini:
--------------------------------------------------------------------------------
 1 | # A generic, single database configuration.
 2 | 
 3 | [alembic]
 4 | # path to migration scripts
 5 | script_location = alembic
 6 | 
 7 | # template used to generate migration files
 8 | # file_template = %%(rev)s_%%(slug)s
 9 | 
10 | # timezone to use when rendering the date
11 | # within the migration file as well as the filename.
12 | # string value is passed to dateutil.tz.gettz()
13 | # leave blank for localtime
14 | # timezone =
15 | 
16 | # max length of characters to apply to the
17 | # "slug" field
18 | # truncate_slug_length = 40
19 | 
20 | # set to 'true' to run the environment during
21 | # the 'revision' command, regardless of autogenerate
22 | # revision_environment = false
23 | 
24 | # set to 'true' to allow .pyc and .pyo files without
25 | # a source .py file to be detected as revisions in the
26 | # versions/ directory
27 | # sourceless = false
28 | 
29 | # version location specification; this defaults
30 | # to alembic/versions.  When using multiple version
31 | # directories, initial revisions must be specified with --version-path
32 | # version_locations = %(here)s/bar %(here)s/bat alembic/versions
33 | 
34 | # the output encoding used when revision files
35 | # are written from script.py.mako
36 | # output_encoding = utf-8
37 | 
38 | sqlalchemy.url = driver://user:pass@localhost/dbname
39 | 
40 | 
41 | [post_write_hooks]
42 | # post_write_hooks defines scripts or Python functions that are run
43 | # on newly generated revision scripts.  See the documentation for further
44 | # detail and examples
45 | 
46 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
47 | # hooks=black
48 | # black.type=console_scripts
49 | # black.entrypoint=black
50 | # black.options=-l 79
51 | 
52 | # Logging configuration
53 | [loggers]
54 | keys = root,sqlalchemy,alembic
55 | 
56 | [handlers]
57 | keys = console
58 | 
59 | [formatters]
60 | keys = generic
61 | 
62 | [logger_root]
63 | level = WARN
64 | handlers = console
65 | qualname =
66 | 
67 | [logger_sqlalchemy]
68 | level = WARN
69 | handlers =
70 | qualname = sqlalchemy.engine
71 | 
72 | [logger_alembic]
73 | level = INFO
74 | handlers =
75 | qualname = alembic
76 | 
77 | [handler_console]
78 | class = StreamHandler
79 | args = (sys.stderr,)
80 | level = NOTSET
81 | formatter = generic
82 | 
83 | [formatter_generic]
84 | format = %(levelname)-5.5s [%(name)s] %(message)s
85 | datefmt = %H:%M:%S
86 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/env.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | from logging.config import fileConfig
 3 | 
 4 | from alembic import context
 5 | from sqlalchemy import engine_from_config, pool
 6 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.legacy_databases.messages.migration import (
 7 |     messages,
 8 | )
 9 | 
10 | # this is the Alembic Config object, which provides
11 | # access to the values within the .ini file in use.
12 | config = context.config
13 | 
14 | # Interpret the config file for Python logging.
15 | # This line sets up loggers basically.
16 | fileConfig(config.config_file_name, disable_existing_loggers=False)
17 | 
18 | # add your model's MetaData object here
19 | # for 'autogenerate' support
20 | # from myapp import mymodel
21 | # target_metadata = mymodel.Base.metadata
22 | target_metadata = messages.Base.metadata
23 | 
24 | # other values from the config, defined by the needs of env.py,
25 | # can be acquired:
26 | # my_important_option = config.get_main_option("my_important_option")
27 | # ... etc.
28 | 
29 | 
30 | def run_migrations_offline():
31 |     """Run migrations in 'offline' mode.
32 | 
33 |     This configures the context with just a URL
34 |     and not an Engine, though an Engine is acceptable
35 |     here as well.  By skipping the Engine creation
36 |     we don't even need a DBAPI to be available.
37 | 
38 |     Calls to context.execute() here emit the given string to the
39 |     script output.
40 | 
41 |     """
42 |     url = config.get_main_option("sqlalchemy.url")
43 |     context.configure(
44 |         url=url,
45 |         target_metadata=target_metadata,
46 |         literal_binds=True,
47 |         dialect_opts={"paramstyle": "named"},
48 |     )
49 | 
50 |     with context.begin_transaction():
51 |         context.run_migrations()
52 | 
53 | 
54 | def run_migrations_online():
55 |     """Run migrations in 'online' mode.
56 | 
57 |     In this scenario we need to create an Engine
58 |     and associate a connection with the context.
59 | 
60 |     """
61 |     connectable = engine_from_config(
62 |         config.get_section(config.config_ini_section),
63 |         prefix="sqlalchemy.",
64 |         poolclass=pool.NullPool,
65 |     )
66 | 
67 |     with connectable.connect() as connection:
68 |         context.configure(
69 |             connection=connection,
70 |             target_metadata=target_metadata,
71 |             render_as_batch=True,
72 |             compare_type=True,
73 |         )
74 | 
75 |         with context.begin_transaction():
76 |             context.run_migrations()
77 | 
78 | 
79 | if context.is_offline_mode():
80 |     run_migrations_offline()
81 | else:
82 |     run_migrations_online()
83 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | ${imports if imports else ""}
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = ${repr(up_revision)}
14 | down_revision = ${repr(down_revision)}
15 | branch_labels = ${repr(branch_labels)}
16 | depends_on = ${repr(depends_on)}
17 | 
18 | 
19 | def upgrade():
20 |     ${upgrades if upgrades else "pass"}
21 | 
22 | 
23 | def downgrade():
24 |     ${downgrades if downgrades else "pass"}
25 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/versions/2c36fcc0b921_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | 
 3 | """content
 4 | 
 5 | Revision ID: 2c36fcc0b921
 6 | Revises: 
 7 | Create Date: 2021-01-08 20:25:52.456387
 8 | 
 9 | """
10 | from alembic import op
11 | import sqlalchemy as sa
12 | 
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision = "2c36fcc0b921"
16 | down_revision = None
17 | branch_labels = None
18 | depends_on = None
19 | 
20 | 
21 | def upgrade():
22 |     # ### commands auto generated by Alembic - please adjust! ###
23 |     op.create_table(
24 |         "medias",
25 |         sa.Column("id", sa.Integer(), nullable=False),
26 |         sa.Column("media_id", sa.Integer(), nullable=True),
27 |         sa.Column("post_id", sa.Integer(), nullable=False),
28 |         sa.Column("link", sa.String(), nullable=True),
29 |         sa.Column("directory", sa.String(), nullable=True),
30 |         sa.Column("filename", sa.String(), nullable=True),
31 |         sa.Column("size", sa.Integer(), nullable=True),
32 |         sa.Column("media_type", sa.String(), nullable=True),
33 |         sa.Column("downloaded", sa.Integer(), nullable=True),
34 |         sa.Column("created_at", sa.DATETIME(), nullable=True),
35 |         sa.PrimaryKeyConstraint("id"),
36 |         sa.UniqueConstraint("media_id"),
37 |     )
38 |     op.create_table(
39 |         "messages",
40 |         sa.Column("id", sa.Integer(), nullable=False),
41 |         sa.Column("post_id", sa.Integer(), nullable=False),
42 |         sa.Column("text", sa.String(), nullable=True),
43 |         sa.Column("price", sa.Integer(), nullable=True),
44 |         sa.Column("paid", sa.Integer(), nullable=True),
45 |         sa.Column("created_at", sa.DATETIME(), nullable=True),
46 |         sa.PrimaryKeyConstraint("id"),
47 |         sa.UniqueConstraint("post_id"),
48 |     )
49 |     # ### end Alembic commands ###
50 | 
51 | 
52 | def downgrade():
53 |     # ### commands auto generated by Alembic - please adjust! ###
54 |     op.drop_table("messages")
55 |     op.drop_table("medias")
56 |     # ### end Alembic commands ###
57 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/versions/7c1c6e101059_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: 7c1c6e101059
 5 | Revises: aeb9fe314556
 6 | Create Date: 2021-05-31 02:56:29.998095
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "7c1c6e101059"
15 | down_revision = "aeb9fe314556"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     with op.batch_alter_table("medias", schema=None) as batch_op:
23 |         batch_op.alter_column(
24 |             "created_at",
25 |             existing_type=sa.DATETIME(),
26 |             type_=sa.TIMESTAMP(),
27 |             existing_nullable=True,
28 |         )
29 | 
30 |     with op.batch_alter_table("messages", schema=None) as batch_op:
31 |         batch_op.add_column(sa.Column("archived", sa.Boolean(), nullable=True))
32 |         batch_op.alter_column(
33 |             "created_at",
34 |             existing_type=sa.DATETIME(),
35 |             type_=sa.TIMESTAMP(),
36 |             existing_nullable=True,
37 |         )
38 | 
39 |     # ### end Alembic commands ###
40 | 
41 | 
42 | def downgrade():
43 |     # ### commands auto generated by Alembic - please adjust! ###
44 |     with op.batch_alter_table("messages", schema=None) as batch_op:
45 |         batch_op.alter_column(
46 |             "created_at",
47 |             existing_type=sa.TIMESTAMP(),
48 |             type_=sa.DATETIME(),
49 |             existing_nullable=True,
50 |         )
51 |         batch_op.drop_column("archived")
52 | 
53 |     with op.batch_alter_table("medias", schema=None) as batch_op:
54 |         batch_op.alter_column(
55 |             "created_at",
56 |             existing_type=sa.TIMESTAMP(),
57 |             type_=sa.DATETIME(),
58 |             existing_nullable=True,
59 |         )
60 | 
61 |     # ### end Alembic commands ###
62 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/versions/aeb9fe314556_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: aeb9fe314556
 5 | Revises: d0118d8ec0b4
 6 | Create Date: 2021-02-14 19:56:59.175268
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "aeb9fe314556"
15 | down_revision = "d0118d8ec0b4"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     with op.batch_alter_table("medias", schema=None) as batch_op:
23 |         batch_op.add_column(sa.Column("linked", sa.String(), nullable=True))
24 | 
25 |     # ### end Alembic commands ###
26 | 
27 | 
28 | def downgrade():
29 |     # ### commands auto generated by Alembic - please adjust! ###
30 |     with op.batch_alter_table("medias", schema=None) as batch_op:
31 |         batch_op.drop_column("linked")
32 | 
33 |     # ### end Alembic commands ###
34 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/versions/bf20242a238f_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: bf20242a238f
 5 | Revises: 7c1c6e101059
 6 | Create Date: 2021-06-20 12:42:35.578665
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "bf20242a238f"
15 | down_revision = "7c1c6e101059"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     with op.batch_alter_table("medias", schema=None) as batch_op:
23 |         batch_op.add_column(sa.Column("api_type", sa.String(), nullable=True))
24 | 
25 |     # ### end Alembic commands ###
26 | 
27 | 
28 | def downgrade():
29 |     # ### commands auto generated by Alembic - please adjust! ###
30 |     with op.batch_alter_table("medias", schema=None) as batch_op:
31 |         batch_op.drop_column("api_type")
32 | 
33 |     # ### end Alembic commands ###
34 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/alembic/versions/d0118d8ec0b4_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: d0118d8ec0b4
 5 | Revises: 2c36fcc0b921
 6 | Create Date: 2021-02-04 02:59:06.516503
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "d0118d8ec0b4"
15 | down_revision = "2c36fcc0b921"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     with op.batch_alter_table("medias", schema=None) as batch_op:
23 |         batch_op.add_column(sa.Column("preview", sa.Integer(), nullable=True))
24 | 
25 |     # ### end Alembic commands ###
26 | 
27 | 
28 | def downgrade():
29 |     # ### commands auto generated by Alembic - please adjust! ###
30 |     with op.batch_alter_table("medias", schema=None) as batch_op:
31 |         batch_op.drop_column("preview")
32 | 
33 |     # ### end Alembic commands ###
34 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/messages.py:
--------------------------------------------------------------------------------
 1 | ### messages.py ###
 2 | 
 3 | # type: ignore
 4 | from sqlalchemy.orm import declarative_base
 5 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.api_model import (
 6 |     ApiModel,
 7 | )
 8 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.media_model import (
 9 |     TemplateMediaModel,
10 | )
11 | 
12 | Base = declarative_base()
13 | 
14 | 
15 | class api_table(ApiModel, Base):
16 |     ApiModel.__tablename__ = "messages"
17 | 
18 | 
19 | class TemplateMediaModel(TemplateMediaModel, Base):
20 |     pass
21 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/test_messages.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/messages/migration/test_messages.db


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic.ini:
--------------------------------------------------------------------------------
 1 | # A generic, single database configuration.
 2 | 
 3 | [alembic]
 4 | # path to migration scripts
 5 | script_location = alembic
 6 | 
 7 | # template used to generate migration files
 8 | # file_template = %%(rev)s_%%(slug)s
 9 | 
10 | # timezone to use when rendering the date
11 | # within the migration file as well as the filename.
12 | # string value is passed to dateutil.tz.gettz()
13 | # leave blank for localtime
14 | # timezone =
15 | 
16 | # max length of characters to apply to the
17 | # "slug" field
18 | # truncate_slug_length = 40
19 | 
20 | # set to 'true' to run the environment during
21 | # the 'revision' command, regardless of autogenerate
22 | # revision_environment = false
23 | 
24 | # set to 'true' to allow .pyc and .pyo files without
25 | # a source .py file to be detected as revisions in the
26 | # versions/ directory
27 | # sourceless = false
28 | 
29 | # version location specification; this defaults
30 | # to alembic/versions.  When using multiple version
31 | # directories, initial revisions must be specified with --version-path
32 | # version_locations = %(here)s/bar %(here)s/bat alembic/versions
33 | 
34 | # the output encoding used when revision files
35 | # are written from script.py.mako
36 | # output_encoding = utf-8
37 | 
38 | sqlalchemy.url = driver://user:pass@localhost/dbname
39 | 
40 | 
41 | [post_write_hooks]
42 | # post_write_hooks defines scripts or Python functions that are run
43 | # on newly generated revision scripts.  See the documentation for further
44 | # detail and examples
45 | 
46 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
47 | # hooks=black
48 | # black.type=console_scripts
49 | # black.entrypoint=black
50 | # black.options=-l 79
51 | 
52 | # Logging configuration
53 | [loggers]
54 | keys = root,sqlalchemy,alembic
55 | 
56 | [handlers]
57 | keys = console
58 | 
59 | [formatters]
60 | keys = generic
61 | 
62 | [logger_root]
63 | level = WARN
64 | handlers = console
65 | qualname =
66 | 
67 | [logger_sqlalchemy]
68 | level = WARN
69 | handlers =
70 | qualname = sqlalchemy.engine
71 | 
72 | [logger_alembic]
73 | level = INFO
74 | handlers =
75 | qualname = alembic
76 | 
77 | [handler_console]
78 | class = StreamHandler
79 | args = (sys.stderr,)
80 | level = NOTSET
81 | formatter = generic
82 | 
83 | [formatter_generic]
84 | format = %(levelname)-5.5s [%(name)s] %(message)s
85 | datefmt = %H:%M:%S
86 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/env.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | from logging.config import fileConfig
 3 | 
 4 | from alembic import context
 5 | from sqlalchemy import engine_from_config, pool
 6 | 
 7 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.legacy_databases.posts.migration import (
 8 |     posts,
 9 | )
10 | 
11 | # this is the Alembic Config object, which provides
12 | # access to the values within the .ini file in use.
13 | config = context.config
14 | 
15 | # Interpret the config file for Python logging.
16 | # This line sets up loggers basically.
17 | fileConfig(config.config_file_name, disable_existing_loggers=False)
18 | 
19 | # add your model's MetaData object here
20 | # for 'autogenerate' support
21 | # from myapp import mymodel
22 | # target_metadata = mymodel.Base.metadata
23 | target_metadata = posts.Base.metadata
24 | 
25 | # other values from the config, defined by the needs of env.py,
26 | # can be acquired:
27 | # my_important_option = config.get_main_option("my_important_option")
28 | # ... etc.
29 | 
30 | 
31 | def run_migrations_offline():
32 |     """Run migrations in 'offline' mode.
33 | 
34 |     This configures the context with just a URL
35 |     and not an Engine, though an Engine is acceptable
36 |     here as well.  By skipping the Engine creation
37 |     we don't even need a DBAPI to be available.
38 | 
39 |     Calls to context.execute() here emit the given string to the
40 |     script output.
41 | 
42 |     """
43 |     url = config.get_main_option("sqlalchemy.url")
44 |     context.configure(
45 |         url=url,
46 |         target_metadata=target_metadata,
47 |         literal_binds=True,
48 |         dialect_opts={"paramstyle": "named"},
49 |     )
50 | 
51 |     with context.begin_transaction():
52 |         context.run_migrations()
53 | 
54 | 
55 | def run_migrations_online():
56 |     """Run migrations in 'online' mode.
57 | 
58 |     In this scenario we need to create an Engine
59 |     and associate a connection with the context.
60 | 
61 |     """
62 |     connectable = engine_from_config(
63 |         config.get_section(config.config_ini_section),
64 |         prefix="sqlalchemy.",
65 |         poolclass=pool.NullPool,
66 |     )
67 | 
68 |     with connectable.connect() as connection:
69 |         context.configure(
70 |             connection=connection,
71 |             target_metadata=target_metadata,
72 |             render_as_batch=True,
73 |             compare_type=True,
74 |         )
75 | 
76 |         with context.begin_transaction():
77 |             context.run_migrations()
78 | 
79 | 
80 | if context.is_offline_mode():
81 |     run_migrations_offline()
82 | else:
83 |     run_migrations_online()
84 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | ${imports if imports else ""}
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = ${repr(up_revision)}
14 | down_revision = ${repr(down_revision)}
15 | branch_labels = ${repr(branch_labels)}
16 | depends_on = ${repr(depends_on)}
17 | 
18 | 
19 | def upgrade():
20 |     ${upgrades if upgrades else "pass"}
21 | 
22 | 
23 | def downgrade():
24 |     ${downgrades if downgrades else "pass"}
25 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/versions/194e05269f09_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: 194e05269f09
 5 | Revises: 
 6 | Create Date: 2021-01-08 20:25:16.796179
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "194e05269f09"
15 | down_revision = None
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     op.create_table(
23 |         "medias",
24 |         sa.Column("id", sa.Integer(), nullable=False),
25 |         sa.Column("media_id", sa.Integer(), nullable=True),
26 |         sa.Column("post_id", sa.Integer(), nullable=False),
27 |         sa.Column("link", sa.String(), nullable=True),
28 |         sa.Column("directory", sa.String(), nullable=True),
29 |         sa.Column("filename", sa.String(), nullable=True),
30 |         sa.Column("size", sa.Integer(), nullable=True),
31 |         sa.Column("media_type", sa.String(), nullable=True),
32 |         sa.Column("downloaded", sa.Integer(), nullable=True),
33 |         sa.Column("created_at", sa.DATETIME(), nullable=True),
34 |         sa.PrimaryKeyConstraint("id"),
35 |         sa.UniqueConstraint("media_id"),
36 |     )
37 |     op.create_table(
38 |         "posts",
39 |         sa.Column("id", sa.Integer(), nullable=False),
40 |         sa.Column("post_id", sa.Integer(), nullable=False),
41 |         sa.Column("text", sa.String(), nullable=True),
42 |         sa.Column("price", sa.Integer(), nullable=True),
43 |         sa.Column("paid", sa.Integer(), nullable=True),
44 |         sa.Column("created_at", sa.DATETIME(), nullable=True),
45 |         sa.PrimaryKeyConstraint("id"),
46 |         sa.UniqueConstraint("post_id"),
47 |     )
48 |     # ### end Alembic commands ###
49 | 
50 | 
51 | def downgrade():
52 |     # ### commands auto generated by Alembic - please adjust! ###
53 |     op.drop_table("posts")
54 |     op.drop_table("medias")
55 |     # ### end Alembic commands ###
56 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/versions/5b4bea08c27f_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: 5b4bea08c27f
 5 | Revises: 194e05269f09
 6 | Create Date: 2021-02-04 02:59:05.010106
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "5b4bea08c27f"
15 | down_revision = "194e05269f09"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     with op.batch_alter_table("medias", schema=None) as batch_op:
23 |         batch_op.add_column(sa.Column("preview", sa.Integer(), nullable=True))
24 | 
25 |     # ### end Alembic commands ###
26 | 
27 | 
28 | def downgrade():
29 |     # ### commands auto generated by Alembic - please adjust! ###
30 |     with op.batch_alter_table("medias", schema=None) as batch_op:
31 |         batch_op.drop_column("preview")
32 | 
33 |     # ### end Alembic commands ###
34 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/versions/6b1b10eb67de_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: 6b1b10eb67de
 5 | Revises: 5b4bea08c27f
 6 | Create Date: 2021-02-14 19:56:56.267261
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "6b1b10eb67de"
15 | down_revision = "5b4bea08c27f"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     with op.batch_alter_table("medias", schema=None) as batch_op:
23 |         batch_op.add_column(sa.Column("linked", sa.String(), nullable=True))
24 | 
25 |     # ### end Alembic commands ###
26 | 
27 | 
28 | def downgrade():
29 |     # ### commands auto generated by Alembic - please adjust! ###
30 |     with op.batch_alter_table("medias", schema=None) as batch_op:
31 |         batch_op.drop_column("linked")
32 | 
33 |     # ### end Alembic commands ###
34 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/versions/990fc1108317_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: 990fc1108317
 5 | Revises: a918b6b05d2f
 6 | Create Date: 2021-06-20 12:42:34.173918
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "990fc1108317"
15 | down_revision = "a918b6b05d2f"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     with op.batch_alter_table("medias", schema=None) as batch_op:
23 |         batch_op.add_column(sa.Column("api_type", sa.String(), nullable=True))
24 | 
25 |     # ### end Alembic commands ###
26 | 
27 | 
28 | def downgrade():
29 |     # ### commands auto generated by Alembic - please adjust! ###
30 |     with op.batch_alter_table("medias", schema=None) as batch_op:
31 |         batch_op.drop_column("api_type")
32 | 
33 |     # ### end Alembic commands ###
34 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/alembic/versions/a918b6b05d2f_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: a918b6b05d2f
 5 | Revises: 6b1b10eb67de
 6 | Create Date: 2021-05-31 02:56:28.192070
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "a918b6b05d2f"
15 | down_revision = "6b1b10eb67de"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     with op.batch_alter_table("medias", schema=None) as batch_op:
23 |         batch_op.alter_column(
24 |             "created_at",
25 |             existing_type=sa.DATETIME(),
26 |             type_=sa.TIMESTAMP(),
27 |             existing_nullable=True,
28 |         )
29 | 
30 |     with op.batch_alter_table("posts", schema=None) as batch_op:
31 |         batch_op.add_column(sa.Column("archived", sa.Boolean(), nullable=True))
32 |         batch_op.alter_column(
33 |             "created_at",
34 |             existing_type=sa.DATETIME(),
35 |             type_=sa.TIMESTAMP(),
36 |             existing_nullable=True,
37 |         )
38 | 
39 |     # ### end Alembic commands ###
40 | 
41 | 
42 | def downgrade():
43 |     # ### commands auto generated by Alembic - please adjust! ###
44 |     with op.batch_alter_table("posts", schema=None) as batch_op:
45 |         batch_op.alter_column(
46 |             "created_at",
47 |             existing_type=sa.TIMESTAMP(),
48 |             type_=sa.DATETIME(),
49 |             existing_nullable=True,
50 |         )
51 |         batch_op.drop_column("archived")
52 | 
53 |     with op.batch_alter_table("medias", schema=None) as batch_op:
54 |         batch_op.alter_column(
55 |             "created_at",
56 |             existing_type=sa.TIMESTAMP(),
57 |             type_=sa.DATETIME(),
58 |             existing_nullable=True,
59 |         )
60 | 
61 |     # ### end Alembic commands ###
62 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/posts.py:
--------------------------------------------------------------------------------
 1 | ### posts.py ###
 2 | 
 3 | # type: ignore
 4 | from sqlalchemy.orm import declarative_base
 5 | 
 6 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.api_model import (
 7 |     ApiModel,
 8 | )
 9 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.media_model import (
10 |     TemplateMediaModel,
11 | )
12 | 
13 | Base = declarative_base()
14 | 
15 | 
16 | class api_table(ApiModel, Base):
17 |     ApiModel.__tablename__ = "posts"
18 | 
19 | 
20 | class TemplateMediaModel(TemplateMediaModel, Base):
21 |     pass
22 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/test_posts.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/posts/migration/test_posts.db


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic.ini:
--------------------------------------------------------------------------------
 1 | # A generic, single database configuration.
 2 | 
 3 | [alembic]
 4 | # path to migration scripts
 5 | script_location = alembic
 6 | 
 7 | # template used to generate migration files
 8 | # file_template = %%(rev)s_%%(slug)s
 9 | 
10 | # timezone to use when rendering the date
11 | # within the migration file as well as the filename.
12 | # string value is passed to dateutil.tz.gettz()
13 | # leave blank for localtime
14 | # timezone =
15 | 
16 | # max length of characters to apply to the
17 | # "slug" field
18 | # truncate_slug_length = 40
19 | 
20 | # set to 'true' to run the environment during
21 | # the 'revision' command, regardless of autogenerate
22 | # revision_environment = false
23 | 
24 | # set to 'true' to allow .pyc and .pyo files without
25 | # a source .py file to be detected as revisions in the
26 | # versions/ directory
27 | # sourceless = false
28 | 
29 | # version location specification; this defaults
30 | # to alembic/versions.  When using multiple version
31 | # directories, initial revisions must be specified with --version-path
32 | # version_locations = %(here)s/bar %(here)s/bat alembic/versions
33 | 
34 | # the output encoding used when revision files
35 | # are written from script.py.mako
36 | # output_encoding = utf-8
37 | 
38 | sqlalchemy.url = driver://user:pass@localhost/dbname
39 | 
40 | 
41 | [post_write_hooks]
42 | # post_write_hooks defines scripts or Python functions that are run
43 | # on newly generated revision scripts.  See the documentation for further
44 | # detail and examples
45 | 
46 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
47 | # hooks=black
48 | # black.type=console_scripts
49 | # black.entrypoint=black
50 | # black.options=-l 79
51 | 
52 | # Logging configuration
53 | [loggers]
54 | keys = root,sqlalchemy,alembic
55 | 
56 | [handlers]
57 | keys = console
58 | 
59 | [formatters]
60 | keys = generic
61 | 
62 | [logger_root]
63 | level = WARN
64 | handlers = console
65 | qualname =
66 | 
67 | [logger_sqlalchemy]
68 | level = WARN
69 | handlers =
70 | qualname = sqlalchemy.engine
71 | 
72 | [logger_alembic]
73 | level = INFO
74 | handlers =
75 | qualname = alembic
76 | 
77 | [handler_console]
78 | class = StreamHandler
79 | args = (sys.stderr,)
80 | level = NOTSET
81 | formatter = generic
82 | 
83 | [formatter_generic]
84 | format = %(levelname)-5.5s [%(name)s] %(message)s
85 | datefmt = %H:%M:%S
86 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/env.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | from logging.config import fileConfig
 3 | 
 4 | from alembic import context
 5 | from sqlalchemy import engine_from_config, pool
 6 | 
 7 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.legacy_databases.stories.migration import (
 8 |     stories,
 9 | )
10 | 
11 | # this is the Alembic Config object, which provides
12 | # access to the values within the .ini file in use.
13 | config = context.config
14 | 
15 | # Interpret the config file for Python logging.
16 | # This line sets up loggers basically.
17 | fileConfig(config.config_file_name, disable_existing_loggers=False)
18 | 
19 | # add your model's MetaData object here
20 | # for 'autogenerate' support
21 | # from myapp import mymodel
22 | # target_metadata = mymodel.Base.metadata
23 | target_metadata = stories.Base.metadata
24 | 
25 | # other values from the config, defined by the needs of env.py,
26 | # can be acquired:
27 | # my_important_option = config.get_main_option("my_important_option")
28 | # ... etc.
29 | 
30 | 
31 | def run_migrations_offline():
32 |     """Run migrations in 'offline' mode.
33 | 
34 |     This configures the context with just a URL
35 |     and not an Engine, though an Engine is acceptable
36 |     here as well.  By skipping the Engine creation
37 |     we don't even need a DBAPI to be available.
38 | 
39 |     Calls to context.execute() here emit the given string to the
40 |     script output.
41 | 
42 |     """
43 |     url = config.get_main_option("sqlalchemy.url")
44 |     context.configure(
45 |         url=url,
46 |         target_metadata=target_metadata,
47 |         literal_binds=True,
48 |         dialect_opts={"paramstyle": "named"},
49 |     )
50 | 
51 |     with context.begin_transaction():
52 |         context.run_migrations()
53 | 
54 | 
55 | def run_migrations_online():
56 |     """Run migrations in 'online' mode.
57 | 
58 |     In this scenario we need to create an Engine
59 |     and associate a connection with the context.
60 | 
61 |     """
62 |     connectable = engine_from_config(
63 |         config.get_section(config.config_ini_section),
64 |         prefix="sqlalchemy.",
65 |         poolclass=pool.NullPool,
66 |     )
67 | 
68 |     with connectable.connect() as connection:
69 |         context.configure(
70 |             connection=connection,
71 |             target_metadata=target_metadata,
72 |             render_as_batch=True,
73 |             compare_type=True,
74 |         )
75 | 
76 |         with context.begin_transaction():
77 |             context.run_migrations()
78 | 
79 | 
80 | if context.is_offline_mode():
81 |     run_migrations_offline()
82 | else:
83 |     run_migrations_online()
84 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | ${imports if imports else ""}
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = ${repr(up_revision)}
14 | down_revision = ${repr(down_revision)}
15 | branch_labels = ${repr(branch_labels)}
16 | depends_on = ${repr(depends_on)}
17 | 
18 | 
19 | def upgrade():
20 |     ${upgrades if upgrades else "pass"}
21 | 
22 | 
23 | def downgrade():
24 |     ${downgrades if downgrades else "pass"}
25 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/versions/29f675c35eee_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: 29f675c35eee
 5 | Revises: 3076beb33c1b
 6 | Create Date: 2021-02-04 02:59:01.746229
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "29f675c35eee"
15 | down_revision = "3076beb33c1b"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     with op.batch_alter_table("medias", schema=None) as batch_op:
23 |         batch_op.add_column(sa.Column("preview", sa.Integer(), nullable=True))
24 | 
25 |     # ### end Alembic commands ###
26 | 
27 | 
28 | def downgrade():
29 |     # ### commands auto generated by Alembic - please adjust! ###
30 |     with op.batch_alter_table("medias", schema=None) as batch_op:
31 |         batch_op.drop_column("preview")
32 | 
33 |     # ### end Alembic commands ###
34 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/versions/2e4f8364f7e2_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: 2e4f8364f7e2
 5 | Revises: ebc3f4bb0782
 6 | Create Date: 2021-05-31 02:56:17.448718
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "2e4f8364f7e2"
15 | down_revision = "ebc3f4bb0782"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     with op.batch_alter_table("medias", schema=None) as batch_op:
23 |         batch_op.alter_column(
24 |             "created_at",
25 |             existing_type=sa.DATETIME(),
26 |             type_=sa.TIMESTAMP(),
27 |             existing_nullable=True,
28 |         )
29 | 
30 |     with op.batch_alter_table("stories", schema=None) as batch_op:
31 |         batch_op.add_column(sa.Column("archived", sa.Boolean(), nullable=True))
32 |         batch_op.alter_column(
33 |             "created_at",
34 |             existing_type=sa.DATETIME(),
35 |             type_=sa.TIMESTAMP(),
36 |             existing_nullable=True,
37 |         )
38 | 
39 |     # ### end Alembic commands ###
40 | 
41 | 
42 | def downgrade():
43 |     # ### commands auto generated by Alembic - please adjust! ###
44 |     with op.batch_alter_table("stories", schema=None) as batch_op:
45 |         batch_op.alter_column(
46 |             "created_at",
47 |             existing_type=sa.TIMESTAMP(),
48 |             type_=sa.DATETIME(),
49 |             existing_nullable=True,
50 |         )
51 |         batch_op.drop_column("archived")
52 | 
53 |     with op.batch_alter_table("medias", schema=None) as batch_op:
54 |         batch_op.alter_column(
55 |             "created_at",
56 |             existing_type=sa.TIMESTAMP(),
57 |             type_=sa.DATETIME(),
58 |             existing_nullable=True,
59 |         )
60 | 
61 |     # ### end Alembic commands ###
62 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/versions/3076beb33c1b_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: 3076beb33c1b
 5 | Revises: 
 6 | Create Date: 2021-01-08 23:09:26.868834
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "3076beb33c1b"
15 | down_revision = None
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     op.create_table(
23 |         "medias",
24 |         sa.Column("id", sa.Integer(), nullable=False),
25 |         sa.Column("media_id", sa.Integer(), nullable=True),
26 |         sa.Column("post_id", sa.Integer(), nullable=False),
27 |         sa.Column("link", sa.String(), nullable=True),
28 |         sa.Column("directory", sa.String(), nullable=True),
29 |         sa.Column("filename", sa.String(), nullable=True),
30 |         sa.Column("size", sa.Integer(), nullable=True),
31 |         sa.Column("media_type", sa.String(), nullable=True),
32 |         sa.Column("downloaded", sa.Integer(), nullable=True),
33 |         sa.Column("created_at", sa.DATETIME(), nullable=True),
34 |         sa.PrimaryKeyConstraint("id"),
35 |         sa.UniqueConstraint("media_id"),
36 |     )
37 |     op.create_table(
38 |         "stories",
39 |         sa.Column("id", sa.Integer(), nullable=False),
40 |         sa.Column("post_id", sa.Integer(), nullable=False),
41 |         sa.Column("text", sa.String(), nullable=True),
42 |         sa.Column("price", sa.Integer(), nullable=True),
43 |         sa.Column("paid", sa.Integer(), nullable=True),
44 |         sa.Column("created_at", sa.DATETIME(), nullable=True),
45 |         sa.PrimaryKeyConstraint("id"),
46 |         sa.UniqueConstraint("post_id"),
47 |     )
48 |     # ### end Alembic commands ###
49 | 
50 | 
51 | def downgrade():
52 |     # ### commands auto generated by Alembic - please adjust! ###
53 |     op.drop_table("stories")
54 |     op.drop_table("medias")
55 |     # ### end Alembic commands ###
56 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/versions/e0c73f066547_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: e0c73f066547
 5 | Revises: 2e4f8364f7e2
 6 | Create Date: 2021-06-20 12:42:31.056065
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "e0c73f066547"
15 | down_revision = "2e4f8364f7e2"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     with op.batch_alter_table("medias", schema=None) as batch_op:
23 |         batch_op.add_column(sa.Column("api_type", sa.String(), nullable=True))
24 | 
25 |     # ### end Alembic commands ###
26 | 
27 | 
28 | def downgrade():
29 |     # ### commands auto generated by Alembic - please adjust! ###
30 |     with op.batch_alter_table("medias", schema=None) as batch_op:
31 |         batch_op.drop_column("api_type")
32 | 
33 |     # ### end Alembic commands ###
34 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/alembic/versions/ebc3f4bb0782_content.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | """content
 3 | 
 4 | Revision ID: ebc3f4bb0782
 5 | Revises: 29f675c35eee
 6 | Create Date: 2021-02-14 19:56:54.040372
 7 | 
 8 | """
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | 
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = "ebc3f4bb0782"
15 | down_revision = "29f675c35eee"
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     with op.batch_alter_table("medias", schema=None) as batch_op:
23 |         batch_op.add_column(sa.Column("linked", sa.String(), nullable=True))
24 | 
25 |     # ### end Alembic commands ###
26 | 
27 | 
28 | def downgrade():
29 |     # ### commands auto generated by Alembic - please adjust! ###
30 |     with op.batch_alter_table("medias", schema=None) as batch_op:
31 |         batch_op.drop_column("linked")
32 | 
33 |     # ### end Alembic commands ###
34 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/stories.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | ### posts.py ###
 3 | 
 4 | from sqlalchemy.orm import declarative_base
 5 | 
 6 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.api_model import (
 7 |     ApiModel,
 8 | )
 9 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.media_model import (
10 |     TemplateMediaModel,
11 | )
12 | 
13 | Base = declarative_base()
14 | 
15 | 
16 | class api_table(ApiModel, Base):
17 |     ApiModel.__tablename__ = "stories"
18 | 
19 | 
20 | class TemplateMediaModel(TemplateMediaModel, Base):
21 |     pass
22 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/test_stories.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/legacy_databases/stories/migration/test_stories.db


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/database_manager/connections/sqlite/models/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/models/api_model.py:
--------------------------------------------------------------------------------
 1 | ### api_table.py ###
 2 | 
 3 | from datetime import datetime
 4 | from typing import cast
 5 | 
 6 | import sqlalchemy
 7 | from sqlalchemy.orm import declarative_base
 8 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.media_model import (
 9 |     TemplateMediaModel,
10 | )
11 | 
12 | LegacyBase = declarative_base()
13 | 
14 | 
15 | class ApiModel:
16 |     __tablename__ = ""
17 |     id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
18 |     post_id = cast(
19 |         int, sqlalchemy.Column(sqlalchemy.Integer, unique=True, nullable=False)
20 |     )
21 |     text = cast(str, sqlalchemy.Column(sqlalchemy.String))
22 |     price = cast(int, sqlalchemy.Column(sqlalchemy.Integer))
23 |     paid = sqlalchemy.Column(sqlalchemy.Integer)
24 |     archived = cast(bool, sqlalchemy.Column(sqlalchemy.Boolean, default=False))
25 |     created_at = cast(datetime, sqlalchemy.Column(sqlalchemy.TIMESTAMP))
26 |     medias: list[TemplateMediaModel] = []
27 | 
28 |     def legacy(self, table_name: str):
29 |         class legacy_api_table(LegacyBase):
30 |             __tablename__ = table_name
31 |             id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
32 |             text = sqlalchemy.Column(sqlalchemy.String)
33 |             price = sqlalchemy.Column(sqlalchemy.Integer)
34 |             paid = sqlalchemy.Column(sqlalchemy.Integer)
35 |             created_at = sqlalchemy.Column(sqlalchemy.DATETIME)
36 | 
37 |         return legacy_api_table
38 | 
39 |     def convert(self):
40 |         item = self.__dict__
41 |         item.pop("_sa_instance_state")
42 |         return item
43 | 
44 |     def find_media(self, media_id: int):
45 |         for db_media in self.medias:
46 |             if db_media.media_id == media_id:
47 |                 return db_media
48 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/models/media_model.py:
--------------------------------------------------------------------------------
 1 | ### api_table.py ###
 2 | 
 3 | from datetime import datetime
 4 | from typing import Any, cast
 5 | 
 6 | import sqlalchemy
 7 | 
 8 | 
 9 | class TemplateMediaModel:
10 |     __tablename__ = "medias"
11 |     id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
12 |     media_id = sqlalchemy.Column(sqlalchemy.Integer)
13 |     post_id = sqlalchemy.Column(sqlalchemy.Integer, nullable=False)
14 |     link = cast(str, sqlalchemy.Column(sqlalchemy.String))
15 |     directory = cast(str, sqlalchemy.Column(sqlalchemy.String))
16 |     filename = cast(str, sqlalchemy.Column(sqlalchemy.String))
17 |     size = cast(int | None, sqlalchemy.Column(sqlalchemy.Integer, default=0))
18 |     api_type = cast(str, sqlalchemy.Column(sqlalchemy.String))
19 |     media_type = sqlalchemy.Column(sqlalchemy.String)
20 |     preview = sqlalchemy.Column(sqlalchemy.Integer, default=0)
21 |     linked = sqlalchemy.Column(sqlalchemy.String, default=None)
22 |     downloaded = cast(bool, sqlalchemy.Column(sqlalchemy.Integer, default=0))
23 |     created_at = cast(datetime, sqlalchemy.Column(sqlalchemy.TIMESTAMP))
24 | 
25 |     def legacy(self, Base: Any):
26 |         class legacy_media_table(Base):
27 |             __tablename__ = "medias"
28 |             id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
29 |             post_id = sqlalchemy.Column(sqlalchemy.Integer)
30 |             link = sqlalchemy.Column(sqlalchemy.String)
31 |             directory = sqlalchemy.Column(sqlalchemy.String)
32 |             filename = sqlalchemy.Column(sqlalchemy.String)
33 |             size = sqlalchemy.Column(sqlalchemy.Integer, default=None)
34 |             media_type = sqlalchemy.Column(sqlalchemy.String)
35 |             downloaded = sqlalchemy.Column(sqlalchemy.Integer, default=0)
36 |             created_at = sqlalchemy.Column(sqlalchemy.DATETIME)
37 | 
38 |         return legacy_media_table
39 | 
40 |     def legacy_2(self, Base: Any):
41 |         class legacy_media_table(Base):
42 |             __tablename__ = "medias"
43 |             id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
44 |             media_id = sqlalchemy.Column(sqlalchemy.Integer, unique=True)
45 |             post_id = sqlalchemy.Column(sqlalchemy.Integer, nullable=False)
46 |             link = cast(str, sqlalchemy.Column(sqlalchemy.String))
47 |             directory = cast(str, sqlalchemy.Column(sqlalchemy.String))
48 |             filename = cast(str, sqlalchemy.Column(sqlalchemy.String))
49 |             size = cast(int, sqlalchemy.Column(sqlalchemy.Integer, default=None))
50 |             media_type = sqlalchemy.Column(sqlalchemy.String)
51 |             preview = sqlalchemy.Column(sqlalchemy.Integer, default=0)
52 |             linked = sqlalchemy.Column(sqlalchemy.String, default=None)
53 |             downloaded = cast(bool, sqlalchemy.Column(sqlalchemy.Integer, default=0))
54 |             created_at = cast(datetime, sqlalchemy.Column(sqlalchemy.TIMESTAMP))
55 | 
56 |         return legacy_media_table
57 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/models/user_database.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, cast
 2 | 
 3 | import sqlalchemy
 4 | from sqlalchemy.orm.decl_api import declarative_base
 5 | 
 6 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.api_model import (
 7 |     ApiModel,
 8 | )
 9 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.media_model import (
10 |     TemplateMediaModel,
11 | )
12 | 
13 | Base = declarative_base()
14 | LegacyBase = declarative_base()
15 | 
16 | 
17 | class profiles_table(Base):
18 |     __tablename__ = "profiles"
19 |     id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
20 |     user_id = cast(int, sqlalchemy.Column(sqlalchemy.Integer, nullable=False))
21 |     username = sqlalchemy.Column(sqlalchemy.String, unique=True, nullable=False)
22 | 
23 | 
24 | class stories_table(ApiModel, Base):
25 |     ApiModel.__tablename__ = "stories"
26 | 
27 | 
28 | class posts_table(ApiModel, Base):
29 |     ApiModel.__tablename__ = "posts"
30 | 
31 | 
32 | class messages_table(ApiModel, Base):
33 |     ApiModel.__tablename__ = "messages"
34 |     user_id = cast(Optional[int], sqlalchemy.Column(sqlalchemy.Integer))
35 | 
36 |     class api_legacy_table(ApiModel, LegacyBase):
37 |         pass
38 | 
39 | 
40 | class products_table(ApiModel, Base):
41 |     ApiModel.__tablename__ = "products"
42 |     title = sqlalchemy.Column(sqlalchemy.String)
43 | 
44 | 
45 | class others_table(ApiModel, Base):
46 |     ApiModel.__tablename__ = "others"
47 | 
48 | 
49 | # class comments_table(api_table,Base):
50 | #     api_table.__tablename__ = "comments"
51 | 
52 | 
53 | class media_table(TemplateMediaModel, Base):
54 |     class media_legacy_table(TemplateMediaModel().legacy_2(LegacyBase), LegacyBase):
55 |         pass
56 | 
57 | 
58 | def table_picker(table_name: str, legacy: bool = False):
59 |     match table_name:
60 |         case "Stories" | "Highlights":
61 |             table = stories_table
62 |         case "Posts":
63 |             table = posts_table
64 |         case "Messages" | "Chats" | "MassMessages":
65 |             table = messages_table if not legacy else messages_table().api_legacy_table
66 |         case "Products":
67 |             table = products_table
68 |         case "Others":
69 |             table = others_table
70 |         case _:
71 |             raise Exception(f'"{table_name}" is an invalid table name')
72 |     return table
73 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/connections/sqlite/sqlite_database.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from pathlib import Path
  3 | from typing import TYPE_CHECKING, Any
  4 | 
  5 | import ultima_scraper_api
  6 | from alembic import command
  7 | from alembic.config import Config
  8 | from alembic.migration import MigrationContext
  9 | from sqlalchemy import create_engine, func
 10 | from sqlalchemy.orm import DeclarativeBase, scoped_session, sessionmaker
 11 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models import (
 12 |     user_database,
 13 | )
 14 | 
 15 | if TYPE_CHECKING:
 16 |     from ultima_scraper_collection.managers.metadata_manager.metadata_manager import (
 17 |         ContentMetadata,
 18 |     )
 19 | 
 20 | user_types = ultima_scraper_api.user_types
 21 | 
 22 | 
 23 | class DBCollection(object):
 24 |     def __init__(self) -> None:
 25 |         self.user_database = user_database
 26 | 
 27 |     def database_picker(self, database_name: str):
 28 |         match database_name:
 29 |             case "user_data":
 30 |                 database = self.user_database
 31 |             case _:
 32 |                 raise Exception(f'"{database_name}" is an invalid database name')
 33 |         return database
 34 | 
 35 | 
 36 | class SqliteDatabase(DeclarativeBase):
 37 |     def __init__(self) -> None:
 38 |         self.name = Path()
 39 |         self.info = ""
 40 |         self.session_factory = sessionmaker()
 41 |         self.session = self.create_session()
 42 |         self.root_directory = Path(__file__).parent
 43 |         self.alembic_directory = Path()
 44 |         self.migration_directory = Path()
 45 | 
 46 |     def init_db(self, name: Path, legacy: bool = False):
 47 |         self.name: Path = name
 48 |         self.info = f"sqlite:///{name}"
 49 |         self.session_factory = sessionmaker(bind=self.create_engine(), autocommit=False)
 50 |         self.session = self.create_session()
 51 |         self.alembic_directory = Path(__file__).parent.joinpath(
 52 |             f"{'databases' if not legacy else 'legacy_databases'}",
 53 |             self.name.stem.lower(),
 54 |             "alembic",
 55 |         )
 56 |         self.migration_directory = self.alembic_directory.parent.joinpath("migration")
 57 |         return self
 58 | 
 59 |     def create_engine(self):
 60 |         return create_engine(
 61 |             self.info,
 62 |             connect_args={},
 63 |         )
 64 | 
 65 |     def create_session(self):
 66 |         return scoped_session(self.session_factory)
 67 | 
 68 |     def execute(self, statement: Any):
 69 |         result = self.session.execute(statement)
 70 |         return result
 71 | 
 72 |     def generate_migration(self):
 73 |         if not self.session.bind:
 74 |             return
 75 |         conn = self.session.bind.engine.connect()
 76 |         context = MigrationContext.configure(conn)
 77 |         current_rev = context.get_current_revision()
 78 |         alembic_cfg = Config(self.migration_directory.joinpath("alembic.ini"))
 79 |         alembic_cfg.set_main_option(
 80 |             "script_location",
 81 |             self.migration_directory.joinpath("alembic").as_posix(),
 82 |         )
 83 |         alembic_cfg.set_main_option("sqlalchemy.url", self.info)
 84 |         if not current_rev:
 85 |             _ggg = command.revision(alembic_cfg, autogenerate=True)
 86 |         else:
 87 |             _ggg = command.revision(alembic_cfg, autogenerate=True, head=current_rev)
 88 |         self.run_migrations()
 89 |         return True
 90 | 
 91 |     def run_migrations(self, legacy: bool = False) -> None:
 92 |         while True:
 93 |             try:
 94 |                 migration_directory = (
 95 |                     self.alembic_directory.parent.joinpath("migration")
 96 |                     if legacy
 97 |                     else self.migration_directory
 98 |                 )
 99 | 
100 |                 alembic_cfg = Config(migration_directory.joinpath("alembic.ini"))
101 |                 alembic_cfg.set_main_option(
102 |                     "script_location",
103 |                     migration_directory.joinpath("alembic").as_posix(),
104 |                 )
105 |                 alembic_cfg.set_main_option("sqlalchemy.url", self.info)
106 |                 command.upgrade(alembic_cfg, "head")
107 |                 break
108 |             except Exception as e:
109 |                 print(e)
110 |                 pass
111 | 
112 |     def revert_migration(self):
113 |         while True:
114 |             try:
115 |                 alembic_cfg = Config(self.migration_directory.joinpath("alembic.ini"))
116 |                 alembic_cfg.set_main_option(
117 |                     "script_location",
118 |                     self.migration_directory.joinpath("alembic").as_posix(),
119 |                 )
120 |                 alembic_cfg.set_main_option("sqlalchemy.url", self.info)
121 |                 command.downgrade(alembic_cfg, "-1")
122 |                 break
123 |             except Exception as e:
124 |                 print(e)
125 |                 pass
126 | 
127 |     def import_metadata(
128 |         self, datas: list["ContentMetadata"], api_type: str | None = None
129 |     ):
130 |         database_path = self.name
131 |         database_path.parent.mkdir(parents=True, exist_ok=True)
132 |         self.run_migrations()
133 |         db_collection = DBCollection()
134 |         database = db_collection.database_picker(database_path.stem)
135 |         database_session = self.session
136 |         for post in datas:
137 |             if post.api_type:
138 |                 api_type = post.api_type
139 |             api_table = database.table_picker(api_type)
140 |             if not api_table:
141 |                 return
142 |             post_id = post.content_id
143 |             post_created_at_string = post.created_at
144 |             date_object = None
145 |             if post_created_at_string:
146 |                 try:
147 |                     date_object = datetime.fromisoformat(post_created_at_string)
148 |                     pass
149 |                 except Exception as _e:
150 |                     date_object = datetime.strptime(
151 |                         post_created_at_string, "%d-%m-%Y %H:%M:%S"
152 |                     )
153 |                     pass
154 |             result = database_session.query(api_table)
155 |             post_db = result.filter_by(post_id=post_id).first()
156 |             if not post_db:
157 |                 post_db = api_table()
158 |             else:
159 |                 pass
160 |             if api_type == "Messages":
161 |                 post_db.user_id = post.user_id
162 |             post_db.post_id = post_id
163 |             post_db.text = post.text
164 |             post_db.price = post.price
165 |             post_db.paid = post.paid
166 |             post_db.archived = post.archived
167 |             if date_object:
168 |                 post_db.created_at = date_object
169 |             database_session.add(post_db)
170 |             for media in post.medias:
171 |                 if media.media_type == "Texts":
172 |                     continue
173 |                 media_created_at_string = media.created_at
174 |                 if not isinstance(media_created_at_string, datetime):
175 |                     if isinstance(media_created_at_string, int):
176 |                         date_object = datetime.fromtimestamp(media_created_at_string)
177 |                     else:
178 |                         try:
179 |                             date_object = datetime.fromisoformat(
180 |                                 media_created_at_string
181 |                             )
182 |                         except Exception as _e:
183 |                             date_object = datetime.strptime(
184 |                                 post_created_at_string, "%d-%m-%Y %H:%M:%S"
185 |                             )
186 |                             pass
187 |                 media_id = media.id
188 |                 result = database_session.query(database.media_table)
189 |                 media_db = result.filter_by(post_id=post_id, media_id=media_id).first()
190 |                 if not media_db:
191 |                     media_db = result.filter_by(
192 |                         filename=media.filename, created_at=date_object
193 |                     ).first()
194 |                     if not media_db:
195 |                         media_db = database.media_table()
196 |                 else:
197 |                     pass
198 |                 if (
199 |                     post.__legacy__
200 |                     and media_db.media_id != media.id
201 |                     and media_db.media_id
202 |                 ):
203 |                     media_id = media_db.media_id
204 | 
205 |                 media_db.media_id = media_id
206 |                 media_db.post_id = post_id
207 |                 media_db.size = media.size if media_db.size is None else media_db.size
208 |                 media_db.link = media.urls[0] if media.urls else None
209 |                 media_db.preview = media.preview
210 |                 media_db.directory = (
211 |                     media.directory.as_posix() if media.directory else None
212 |                 )
213 |                 media_db.filename = media.filename
214 |                 media_db.api_type = api_type
215 |                 media_db.media_type = media.media_type
216 |                 media_db.linked = media.linked
217 |                 if date_object:
218 |                     media_db.created_at = date_object
219 |                 database_session.add(media_db)
220 |         database_session.commit()
221 |         database_session.close()
222 |         return True
223 | 
224 |     def legacy_sqlite_updater(
225 |         self,
226 |         api_type: str,
227 |         subscription: user_types,
228 |     ):
229 |         final_result: list[dict[str, Any]] = []
230 |         legacy_metadata_path = self.name
231 |         if legacy_metadata_path.exists():
232 |             self.run_migrations(legacy=True)
233 |             database_name = "user_data"
234 |             database_session = self.session
235 |             db_collection = DBCollection()
236 |             database = db_collection.database_picker(database_name)
237 |             if database:
238 |                 if api_type == "Messages":
239 |                     api_table_table = database.table_picker(api_type, True)
240 |                 else:
241 |                     api_table_table = database.table_picker(api_type)
242 |                 media_table_table = database.media_table.media_legacy_table
243 |                 if api_table_table:
244 |                     result = database_session.query(api_table_table).all()
245 |                     result2 = database_session.query(media_table_table).all()
246 |                     for item in result:
247 |                         for item2 in result2:
248 |                             if item.post_id != item2.post_id:
249 |                                 continue
250 |                             item.medias.append(item2)
251 |                         item.user_id = subscription.id
252 |                         final_result.append(item)
253 |             database_session.close()
254 |         return final_result
255 | 
256 |     def find_table(self, name: str):
257 |         table = [x for x in self.metadata.sorted_tables if x.name == name]
258 |         if table:
259 |             return table[0]
260 | 
261 |     def get_count(self, q: Any):
262 |         count_q = q.statement.with_only_columns(func.count()).order_by(None)
263 |         count: int = q.session.execute(count_q).scalar()
264 |         return count
265 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/database_manager/database_manager.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.sqlite_database import (
 4 |     SqliteDatabase,
 5 | )
 6 | 
 7 | 
 8 | class DatabaseManager:
 9 |     def __init__(self) -> None:
10 |         self.active_db: SqliteDatabase | None = None
11 | 
12 |     def get_sqlite_db(self, path: Path, legacy: bool = False):
13 |         sqlite_db = SqliteDatabase().init_db(path, legacy)
14 |         self.active_db = sqlite_db
15 |         return sqlite_db
16 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/datascraper_manager/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/datascraper_manager/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/datascraper_manager/datascraper_manager.py:
--------------------------------------------------------------------------------
 1 | import ultima_scraper_api
 2 | from ultima_scraper_api import SUPPORTED_SITES
 3 | from ultima_scraper_api.apis.onlyfans import onlyfans
 4 | from ultima_scraper_collection import datascraper_types
 5 | from ultima_scraper_collection.config import UltimaScraperCollectionConfig
 6 | from ultima_scraper_collection.managers.datascraper_manager.datascrapers.fansly import (
 7 |     FanslyDataScraper,
 8 | )
 9 | from ultima_scraper_collection.managers.datascraper_manager.datascrapers.onlyfans import (
10 |     OnlyFansDataScraper,
11 | )
12 | from ultima_scraper_collection.managers.option_manager import OptionManager
13 | from ultima_scraper_collection.managers.server_manager import ServerManager
14 | 
15 | 
16 | class DataScraperManager:
17 |     def __init__(
18 |         self, server_manager: ServerManager, config: UltimaScraperCollectionConfig
19 |     ) -> None:
20 |         self.datascrapers: dict[str, datascraper_types] = {}
21 |         self.server_manager: ServerManager = server_manager
22 |         self.config = config
23 |         for site_name in SUPPORTED_SITES:
24 |             datascraper = self.add_datascraper(
25 |                 ultima_scraper_api.select_api(site_name, config),
26 |                 OptionManager(),
27 |                 self.server_manager,
28 |             )
29 |             datascraper.filesystem_manager.activate_directory_manager(
30 |                 self.get_site_config(site_name)
31 |             )
32 | 
33 |     def get_site_config(self, name: str):
34 |         return getattr(self.config.site_apis, name.lower())
35 | 
36 |     def find_datascraper(
37 |         self,
38 |         site_name: str,
39 |     ):
40 |         return self.datascrapers.get(site_name.lower())
41 | 
42 |     def select_datascraper(
43 |         self,
44 |         site_name: str,
45 |     ):
46 |         return self.datascrapers.get(site_name.lower())
47 | 
48 |     def add_datascraper(
49 |         self,
50 |         api: ultima_scraper_api.api_types,
51 |         option_manager: OptionManager,
52 |         server_manager: ServerManager,
53 |     ):
54 |         site_settings = self.get_site_config(api.site_name)
55 |         if isinstance(api, onlyfans.OnlyFansAPI):
56 |             datascraper = OnlyFansDataScraper(
57 |                 api, option_manager, server_manager, site_settings
58 |             )
59 |         else:
60 |             datascraper = FanslyDataScraper(
61 |                 api, option_manager, server_manager, site_settings
62 |             )
63 |         self.datascrapers[api.site_name.lower()] = datascraper
64 |         return datascraper
65 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/datascraper_manager/datascrapers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/datascraper_manager/datascrapers/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/datascraper_manager/datascrapers/fansly.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from typing import TYPE_CHECKING, Any
  3 | 
  4 | from ultima_scraper_api.apis.fansly.fansly import FanslyAPI
  5 | from ultima_scraper_renamer.reformat import ReformatManager
  6 | 
  7 | from ultima_scraper_collection.config import Sites
  8 | from ultima_scraper_collection.managers.metadata_manager.metadata_manager import (
  9 |     ApiExtractor,
 10 |     ContentMetadata,
 11 | )
 12 | from ultima_scraper_collection.managers.option_manager import OptionManager
 13 | from ultima_scraper_collection.managers.server_manager import ServerManager
 14 | from ultima_scraper_collection.modules.module_streamliner import StreamlinedDatascraper
 15 | 
 16 | if TYPE_CHECKING:
 17 |     from ultima_scraper_api.apis.fansly.classes.auth_model import AuthModel
 18 |     from ultima_scraper_api.apis.fansly.classes.message_model import create_message
 19 |     from ultima_scraper_api.apis.fansly.classes.post_model import create_post
 20 |     from ultima_scraper_api.apis.fansly.classes.story_model import create_story
 21 |     from ultima_scraper_api.apis.fansly.classes.user_model import create_user
 22 | 
 23 | 
 24 | class FanslyDataScraper(StreamlinedDatascraper):
 25 |     def __init__(
 26 |         self,
 27 |         api: FanslyAPI,
 28 |         option_manager: OptionManager,
 29 |         server_manager: ServerManager,
 30 |         site_config: Sites.FanslyAPIConfig,
 31 |     ) -> None:
 32 |         self.api = api
 33 |         self.option_manager = option_manager
 34 |         self.site_config = site_config
 35 |         StreamlinedDatascraper.__init__(self, self, server_manager)
 36 | 
 37 |     # Scrapes the API for content
 38 |     async def media_scraper(
 39 |         self,
 40 |         content_result: "create_story | create_post | create_message",
 41 |         subscription: "create_user",
 42 |         api_type: str,
 43 |     ) -> dict[str, Any]:
 44 |         authed = subscription.get_authed()
 45 |         site_config = self.site_config
 46 |         new_set: dict[str, Any] = {"content": []}
 47 |         directories: list[Path] = []
 48 |         if api_type == "Stories":
 49 |             pass
 50 |         if api_type == "Archived":
 51 |             pass
 52 |         if api_type == "Posts":
 53 |             pass
 54 |         if api_type == "Messages":
 55 |             pass
 56 | 
 57 |         content_metadata = ContentMetadata(content_result.id, api_type)
 58 |         await content_metadata.resolve_extractor(ApiExtractor(content_result))
 59 |         for asset in content_metadata.medias:
 60 |             if asset.urls:
 61 |                 reformat_manager = ReformatManager(authed, self.filesystem_manager)
 62 |                 reformat_item = reformat_manager.prepare_reformat(asset)
 63 |                 file_directory = reformat_item.reformat(
 64 |                     site_config.download_setup.directory_format
 65 |                 )
 66 |                 reformat_item.directory = file_directory
 67 |                 file_path = reformat_item.reformat(
 68 |                     site_config.download_setup.filename_format
 69 |                 )
 70 |                 asset.directory = file_directory
 71 |                 asset.filename = file_path.name
 72 | 
 73 |                 if file_directory not in directories:
 74 |                     directories.append(file_directory)
 75 |         new_set["content"].append(content_metadata)
 76 |         new_set["directories"] = directories
 77 |         return new_set
 78 | 
 79 |     async def get_all_stories(self, subscription: "create_user"):
 80 |         """
 81 |         get_all_stories(subscription: create_user)
 82 | 
 83 |         This function returns a list of all stories and archived stories from the given subscription.
 84 | 
 85 |         Arguments:
 86 |         subscription (create_user): An instance of the create_user class.
 87 | 
 88 |         Returns:
 89 |         list[create_story]: A list containing all stories and archived stories from the subscription.
 90 |         """
 91 |         master_set: list["create_story"] = []
 92 |         master_set.extend(await subscription.get_stories())
 93 |         # master_set.extend(await subscription.get_archived_stories())
 94 |         return master_set
 95 | 
 96 |     async def get_all_posts(self, subscription: "create_user"):
 97 |         temp_master_set = await subscription.get_posts()
 98 |         collections = await subscription.get_collections()
 99 |         for collection in collections:
100 |             temp_master_set.append(
101 |                 await subscription.get_collection_content(collection)
102 |             )
103 |         return temp_master_set
104 | 
105 |     async def get_all_subscriptions(
106 |         self,
107 |         authed: "AuthModel",
108 |         identifiers: list[int | str] = [],
109 |         refresh: bool = True,
110 |     ):
111 |         """
112 |         get_all_subscriptions(authed: AuthModel, identifiers: list[int | str] = [], refresh: bool = True)
113 | 
114 |         This function returns a list of all subscriptions, including both subscriptions and followings,
115 |         from the given authenticated user.
116 | 
117 |         Arguments:
118 |         authed (AuthModel): An instance of the AuthModel class.
119 |         identifiers (list[int | str], optional): A list of identifiers (username or id) for the subscriptions. Defaults to an empty list.
120 |         refresh (bool, optional): A flag indicating whether to refresh the list of subscriptions. Defaults to True.
121 | 
122 |         Returns:
123 |         list[create_subscription]: A list of all subscriptions, including both subscriptions and followings, from the authenticated user.
124 |         """
125 |         authed.followed_users = await authed.get_followings(identifiers=identifiers)
126 |         subscriptions = await authed.get_subscriptions(
127 |             identifiers=identifiers, refresh=refresh, sub_type="active"
128 |         )
129 |         subscriptions.sort(key=lambda x: x.ends_at)
130 |         return subscriptions
131 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/datascraper_manager/datascrapers/onlyfans.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from datetime import datetime, timezone
  3 | from pathlib import Path
  4 | from typing import TYPE_CHECKING, Any
  5 | 
  6 | from sqlalchemy import select
  7 | from sqlalchemy.orm import joinedload
  8 | from ultima_scraper_api.apis.onlyfans.classes.mass_message_model import MassMessageModel
  9 | from ultima_scraper_api.apis.onlyfans.onlyfans import OnlyFansAPI
 10 | from ultima_scraper_collection.config import Sites
 11 | from ultima_scraper_collection.managers.metadata_manager.metadata_manager import (
 12 |     ApiExtractor,
 13 |     ContentMetadata,
 14 | )
 15 | from ultima_scraper_collection.managers.option_manager import OptionManager
 16 | from ultima_scraper_collection.managers.server_manager import ServerManager
 17 | from ultima_scraper_collection.modules.module_streamliner import StreamlinedDatascraper
 18 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import PostModel
 19 | from ultima_scraper_renamer.reformat import ReformatManager
 20 | 
 21 | if TYPE_CHECKING:
 22 |     from ultima_scraper_api.apis.onlyfans.classes.auth_model import OnlyFansAuthModel
 23 |     from ultima_scraper_api.apis.onlyfans.classes.hightlight_model import (
 24 |         create_highlight,
 25 |     )
 26 |     from ultima_scraper_api.apis.onlyfans.classes.message_model import create_message
 27 |     from ultima_scraper_api.apis.onlyfans.classes.post_model import create_post
 28 |     from ultima_scraper_api.apis.onlyfans.classes.story_model import create_story
 29 |     from ultima_scraper_api.apis.onlyfans.classes.user_model import create_user
 30 | 
 31 | 
 32 | class OnlyFansDataScraper(StreamlinedDatascraper):
 33 |     def __init__(
 34 |         self,
 35 |         api: OnlyFansAPI,
 36 |         option_manager: OptionManager,
 37 |         server_manager: ServerManager,
 38 |         site_config: Sites.OnlyFansAPIConfig,
 39 |     ) -> None:
 40 |         self.api = api
 41 |         self.option_manager = option_manager
 42 |         self.site_config = site_config
 43 |         StreamlinedDatascraper.__init__(self, self, server_manager)
 44 | 
 45 |     # Scrapes the API for content
 46 |     async def media_scraper(
 47 |         self,
 48 |         content_result: "create_story | create_post | create_message|MassMessageModel",
 49 |         subscription: "create_user",
 50 |         api_type: str,
 51 |     ) -> dict[str, Any]:
 52 |         api_type = self.api.convert_api_type_to_key(content_result)
 53 |         authed = subscription.get_authed()
 54 |         site_config = self.site_config
 55 |         new_set: dict[str, Any] = {"content": []}
 56 |         directories: list[Path] = []
 57 |         if api_type == "Stories":
 58 |             pass
 59 |         if api_type == "Posts":
 60 |             pass
 61 |         if api_type == "Messages":
 62 |             pass
 63 | 
 64 |         content_metadata = ContentMetadata(
 65 |             content_result.id, api_type, self.resolve_content_manager(subscription)
 66 |         )
 67 | 
 68 |         await content_metadata.resolve_extractor(ApiExtractor(content_result))
 69 |         for asset in content_metadata.medias:
 70 |             if asset.urls:
 71 |                 reformat_manager = ReformatManager(authed, self.filesystem_manager)
 72 |                 reformat_item = reformat_manager.prepare_reformat(asset)
 73 |                 if reformat_item.api_type == "Messages":
 74 |                     if (
 75 |                         content_metadata.queue_id
 76 |                         and content_metadata.__soft__.is_mass_message()
 77 |                     ):
 78 |                         reformat_item.api_type = "MassMessages"
 79 |                 file_directory = reformat_item.reformat(
 80 |                     site_config.download_setup.directory_format
 81 |                 )
 82 |                 reformat_item.directory = file_directory
 83 |                 file_path = reformat_item.reformat(
 84 |                     site_config.download_setup.filename_format
 85 |                 )
 86 |                 asset.directory = file_directory
 87 |                 asset.filename = file_path.name
 88 | 
 89 |                 if file_directory not in directories:
 90 |                     directories.append(file_directory)
 91 |         new_set["content"].append(content_metadata)
 92 |         new_set["directories"] = directories
 93 |         return new_set
 94 | 
 95 |     async def get_all_stories(self, subscription: "create_user"):
 96 |         """
 97 |         get_all_stories(subscription: create_user)
 98 | 
 99 |         This function returns a list of all stories and highlights from the given subscription.
100 | 
101 |         Arguments:
102 |         subscription (create_user): An instance of the create_user class.
103 | 
104 |         Returns:
105 |         list[create_highlight | create_story]: A list containing all stories and highlights from the subscription.
106 |         """
107 |         master_set: list[create_highlight | create_story] = []
108 |         master_set.extend(await subscription.get_stories())
109 |         master_set.extend(await subscription.get_archived_stories())
110 |         highlights = await subscription.get_highlights()
111 |         valid_highlights: list[create_highlight | create_story] = []
112 |         for highlight in highlights:
113 |             resolved_highlight = await subscription.get_highlights(
114 |                 hightlight_id=highlight.id
115 |             )
116 |             valid_highlights.extend(resolved_highlight)
117 |         master_set.extend(valid_highlights)
118 |         return master_set
119 | 
120 |     async def get_all_posts(self, performer: "create_user") -> list["create_post"]:
121 |         async with self.get_archive_db_api().create_site_api(
122 |             performer.get_api().site_name
123 |         ) as db_site_api:
124 |             after_date = None
125 |             # db_performer = await db_site_api.get_user(performer.id)
126 |             # await db_performer.awaitable_attrs._posts
127 |             # result = await db_performer.last_subscription_downloaded_at()
128 |             # if result:
129 |             #     after_date = result.downloaded_at
130 | 
131 |             posts = await performer.get_posts(after_date=after_date)
132 |             archived_posts = await performer.get_posts(label="archived")
133 |             private_archived_posts = await performer.get_posts(label="private_archived")
134 | 
135 |             session = db_site_api.get_session()
136 |             posts_with_comments = (
137 |                 select(PostModel)
138 |                 .options(joinedload(PostModel.comments))
139 |                 .filter(PostModel.comments.any())
140 |                 .where(PostModel.user_id == performer.id)
141 |                 .order_by(PostModel.created_at.desc())
142 |             )
143 |             results = await session.scalars(posts_with_comments)
144 |             db_posts = results.unique().all()
145 |             threshold_date = (
146 |                 db_posts[0].created_at
147 |                 if db_posts
148 |                 else datetime.min.replace(tzinfo=timezone.utc)
149 |             )
150 |             tasks = [
151 |                 x.get_comments()
152 |                 for x in performer.scrape_manager.scraped.Posts.values()
153 |                 if x.created_at > threshold_date
154 |             ]
155 |             await asyncio.gather(*tasks)
156 |             return posts + archived_posts + private_archived_posts
157 | 
158 |     async def get_all_subscriptions(
159 |         self,
160 |         authed: "OnlyFansAuthModel",
161 |         identifiers: list[int | str] = [],
162 |         refresh: bool = True,
163 |     ):
164 |         """
165 |         get_all_subscriptions(authed: AuthModel, identifiers: list[int | str] = [], refresh: bool = True)
166 | 
167 |         This function returns a list of all subscriptions from the given authenticated user.
168 | 
169 |         Arguments:
170 |         authed (AuthModel): An instance of the AuthModel class.
171 |         identifiers (list[int | str], optional): A list of identifiers (username or id) for the subscriptions. Defaults to an empty list.
172 |         refresh (bool, optional): A flag indicating whether to refresh the list of subscriptions. Defaults to True.
173 | 
174 |         Returns:
175 |         list[create_subscription]: A list of all subscriptions, sorted by expiredAt, from the authenticated user.
176 |         """
177 |         subscriptions = await authed.get_subscriptions(
178 |             identifiers=identifiers, refresh=refresh, sub_type="active"
179 |         )
180 |         subscriptions.sort(key=lambda x: x.subscribed_by_expire_date)
181 |         return subscriptions
182 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/download_manager.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import copy
  3 | from pathlib import Path
  4 | from urllib.parse import urlparse
  5 | 
  6 | import ffmpeg
  7 | from aiohttp import ClientResponse
  8 | from alive_progress import alive_bar
  9 | from ultima_scraper_api import auth_types
 10 | from ultima_scraper_api.apis.onlyfans.classes.mass_message_model import MassMessageModel
 11 | from ultima_scraper_api.helpers import main_helper
 12 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import (
 13 |     MediaModel,
 14 |     MessageModel,
 15 | )
 16 | from ultima_scraper_renamer.reformat import ReformatManager
 17 | 
 18 | from ultima_scraper_collection.managers.database_manager.connections.sqlite.models.media_model import (
 19 |     TemplateMediaModel,
 20 | )
 21 | from ultima_scraper_collection.managers.filesystem_manager import FilesystemManager
 22 | from ultima_scraper_collection.managers.metadata_manager.metadata_manager import (
 23 |     MediaMetadata,
 24 | )
 25 | 
 26 | 
 27 | class DownloadManager:
 28 |     def __init__(
 29 |         self,
 30 |         authed: auth_types,
 31 |         filesystem_manager: FilesystemManager,
 32 |         media_set: set[MediaMetadata] = set(),
 33 |         reformat: bool = True,
 34 |     ) -> None:
 35 |         self.authed = authed
 36 |         self.filesystem_manager = filesystem_manager
 37 |         self.auth_session = self.authed.auth_session
 38 |         self.requester = self.authed.get_requester()
 39 |         self.content_list: set[MediaMetadata] = media_set
 40 |         self.errors: list[TemplateMediaModel] = []
 41 |         self.reformat = reformat
 42 |         self.reformat_manager = ReformatManager(self.authed, filesystem_manager)
 43 |         self.bar = None
 44 | 
 45 |     async def bulk_download(self):
 46 |         final_list = [self.download(media_item) for media_item in self.content_list]
 47 |         if final_list:
 48 |             with alive_bar(len(self.content_list)) as bar:
 49 |                 self.bar = bar
 50 |                 _result = await asyncio.gather(*final_list, return_exceptions=True)
 51 | 
 52 |     async def drm_download(self, download_item: MediaMetadata):
 53 |         content_metadata = download_item.__content_metadata__
 54 |         authed = self.authed
 55 |         reformat_manager = ReformatManager(authed, self.filesystem_manager)
 56 |         assert reformat_manager.filesystem_manager.directory_manager
 57 |         site_config = reformat_manager.filesystem_manager.directory_manager.site_config
 58 |         drm = authed.drm
 59 |         media_item = download_item.__raw__
 60 |         assert drm and media_item
 61 |         mpd = await drm.get_mpd(media_item)
 62 |         pssh = await drm.get_pssh(mpd)
 63 |         responses: list[ClientResponse] = []
 64 | 
 65 |         if pssh:
 66 |             if content_metadata:
 67 |                 soft_data = content_metadata.__soft__
 68 |                 raw_data = soft_data.__raw__.copy()
 69 |                 if (
 70 |                     isinstance(soft_data, MassMessageModel)
 71 |                     and soft_data
 72 |                     and soft_data.author.is_authed_user()
 73 |                 ):
 74 |                     raw_data["responseType"] = ""
 75 |             else:
 76 |                 raw_data = {"responseType": ""}
 77 |             license = await drm.get_license(raw_data, media_item, pssh)
 78 |             keys = await drm.get_keys(license)
 79 |             content_key = keys[-1]
 80 |             key = f"{content_key.kid.hex}:{content_key.key.hex()}"
 81 |             download_item.key = key
 82 |             video_url, audio_url = [
 83 |                 drm.get_video_url(mpd, media_item),
 84 |                 drm.get_audio_url(mpd, media_item),
 85 |             ]
 86 |             download_item.urls = [video_url]
 87 |             reformat_item = reformat_manager.prepare_reformat(download_item)
 88 |             file_directory = reformat_item.reformat(
 89 |                 site_config.download_setup.directory_format
 90 |             )
 91 |             reformat_item.directory = file_directory
 92 |             file_path = reformat_item.reformat(
 93 |                 site_config.download_setup.filename_format
 94 |             )
 95 |             download_item.directory = file_directory
 96 |             download_item.filename = file_path.name
 97 |             for media_url in video_url, audio_url:
 98 |                 drm_download_item = copy.copy(download_item)
 99 |                 drm_download_item = reformat_manager.drm_format(
100 |                     media_url, drm_download_item
101 |                 )
102 | 
103 |                 signature_str = await drm.get_signature(media_item)
104 |                 response = await authed.auth_session.request(
105 |                     media_url, premade_settings="", custom_cookies=signature_str
106 |                 )
107 |                 responses.append(response)
108 |         return responses
109 | 
110 |     async def download(self, download_item: MediaMetadata):
111 |         if not download_item.urls:
112 |             return
113 |         attempt = 0
114 |         db_media = download_item.__db_media__
115 |         assert db_media
116 |         await db_media.awaitable_attrs.content_media_assos
117 |         content = download_item.get_content_metadata()
118 |         if content:
119 |             db_content = content.__db_content__
120 |             assert db_content
121 |             if isinstance(db_content, MessageModel):
122 |                 if db_content.queue_id:
123 |                     try:
124 |                         db_filepath = db_media.find_filepath(
125 |                             (db_content.queue_id, "MassMessages")
126 |                         )
127 |                     except Exception as _e:
128 |                         pass
129 |                     pass
130 |                 else:
131 |                     db_filepath = db_media.find_filepath()
132 |             else:
133 |                 db_filepath = db_media.find_filepath()
134 |         else:
135 |             db_filepath = db_media.find_filepath()
136 |             pass
137 |         matches = ["us", "uk", "ca", "ca2", "de"]
138 |         p_url = urlparse(download_item.urls[0])
139 |         assert p_url.hostname
140 |         subdomain = p_url.hostname.split(".")[0]
141 |         if any(subdomain in nm for nm in matches):
142 |             return
143 | 
144 |         authed = self.authed
145 |         authed_drm = authed.drm
146 | 
147 |         async with self.auth_session.semaphore:
148 |             while attempt < self.auth_session.get_session_manager().max_attempts + 1:
149 |                 try:
150 |                     if download_item.drm:
151 |                         if not authed_drm:
152 |                             break
153 |                         responses = await self.drm_download(download_item)
154 |                     else:
155 |                         responses = [
156 |                             await self.requester.request(download_item.urls[0])
157 |                         ]
158 |                     if all(response.status != 200 for response in responses):
159 |                         attempt += 1
160 |                         continue
161 |                     if not download_item.directory:
162 |                         raise Exception(
163 |                             f"{download_item.id} has no directory\n {download_item}"
164 |                         )
165 |                     decrypted_media_paths: list[Path] = []
166 |                     final_size = 0
167 |                     error = None
168 |                     for response in responses:
169 |                         if download_item.drm and await self.drm_check_downloaded(
170 |                             download_item
171 |                         ):
172 |                             continue
173 |                         download_path, error = await self.writer(
174 |                             response, download_item, encrypted=bool(download_item.key)
175 |                         )
176 |                         if error:
177 |                             attempt += 1
178 |                             break
179 |                         if authed_drm and download_item.drm and download_path:
180 |                             output_filepath = authed_drm.decrypt_file(
181 |                                 download_path, download_item.key
182 |                             )
183 |                             if not output_filepath:
184 |                                 raise Exception("No output_filepath")
185 |                             decrypted_media_paths.append(output_filepath)
186 |                         if response.content_length:
187 |                             final_size += response.content_length
188 |                     if error == 1:
189 |                         # Server Disconnect Error
190 |                         continue
191 |                     elif error == 2:
192 |                         # Resource Not Found Error
193 |                         break
194 |                     assert download_item.filename
195 |                     download_path = download_item.directory.joinpath(
196 |                         download_item.filename
197 |                     )
198 |                     if authed_drm and download_item.drm:
199 |                         formatted = self.format_media(
200 |                             download_path,
201 |                             decrypted_media_paths,
202 |                         )
203 |                         if not formatted:
204 |                             pass
205 |                         final_size = download_path.stat().st_size
206 |                     timestamp = db_media.created_at.timestamp()
207 |                     await main_helper.format_file(
208 |                         download_path, timestamp, self.reformat
209 |                     )
210 |                     if db_media and db_filepath:
211 |                         if not db_filepath.preview:
212 |                             db_media.size = download_item.size = final_size
213 |                         else:
214 |                             if final_size > db_media.size:
215 |                                 db_media.size = final_size
216 |                         db_filepath.downloaded = True
217 |                     break
218 |                 except asyncio.TimeoutError as _e:
219 |                     continue
220 |                 except Exception as _e:
221 |                     print(_e)
222 |         self.bar()
223 | 
224 |     async def writer(
225 |         self,
226 |         result: ClientResponse,
227 |         download_item: MediaMetadata,
228 |         encrypted: bool = True,
229 |     ):
230 |         async with result as response:
231 |             if download_item.drm and encrypted:
232 |                 download_item = copy.copy(download_item)
233 |                 download_item = self.reformat_manager.drm_format(
234 |                     response.url.human_repr(), download_item
235 |                 )
236 |             assert download_item.directory and download_item.filename
237 |             download_path = Path(download_item.directory, download_item.filename)
238 |             db_media = copy.copy(download_item.__db_media__)
239 |             db_media.directory = download_item.directory
240 |             db_media.filename = download_item.filename
241 |             download = await self.check(db_media, response)
242 |             if not download:
243 |                 return download_path, None
244 |             failed = await self.filesystem_manager.write_data(response, download_path)
245 |             return download_path, failed
246 | 
247 |     async def drm_check_downloaded(self, download_item: MediaMetadata):
248 |         download_path = download_item.get_filepath()
249 |         if download_path.exists():
250 |             if download_path.stat().st_size and download_item.__db_media__.size:
251 |                 return True
252 |         return False
253 | 
254 |     async def check(self, download_item: MediaModel, response: ClientResponse):
255 |         # Checks if we should download item or not // True | False
256 |         filepath = Path(download_item.directory, download_item.filename)
257 |         response_status = False
258 |         if response.status == 200:
259 |             response_status = True
260 |             if response.content_length:
261 |                 download_item.size = response.content_length
262 | 
263 |         if filepath.exists():
264 |             try:
265 |                 if filepath.stat().st_size == response.content_length:
266 |                     return False
267 |                 else:
268 |                     return True
269 |             except Exception as _e:
270 |                 pass
271 |         else:
272 |             if response_status:
273 |                 # Can produce false positives due to the same reason below
274 |                 return True
275 |             else:
276 |                 # Reached this point because it probably exists in the folder but under a different content category
277 |                 pass
278 | 
279 |     def format_media(self, output_filepath: Path, decrypted_media_paths: list[Path]):
280 |         # If you have decrypted video and audio to merge
281 |         if len(decrypted_media_paths) > 1:
282 |             dec_video_path, dec_audio_path = decrypted_media_paths
283 |             video_input = ffmpeg.input(dec_video_path)  # type:ignore
284 |             audio_input = ffmpeg.input(dec_audio_path)  # type:ignore
285 |             try:
286 |                 _ffmpeg_output = ffmpeg.output(  # type:ignore
287 |                     video_input,  # type:ignore
288 |                     audio_input,  # type:ignore
289 |                     output_filepath.as_posix(),
290 |                     vcodec="copy",
291 |                     acodec="copy",
292 |                 ).run(capture_stdout=True, capture_stderr=True, overwrite_output=True)
293 |                 return True
294 |             except ffmpeg.Error as _e:
295 |                 return False
296 |         return True
297 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/filesystem_manager.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import copy
  4 | import hashlib
  5 | import os
  6 | import shutil
  7 | from pathlib import Path
  8 | from typing import TYPE_CHECKING, Any, Generator, Literal
  9 | 
 10 | import ultima_scraper_api
 11 | from aiohttp.client_reqrep import ClientResponse
 12 | from ultima_scraper_api.helpers.main_helper import open_partial
 13 | from ultima_scraper_api.managers.session_manager import EXCEPTION_TEMPLATE
 14 | from ultima_scraper_collection.helpers import main_helper as usc_helper
 15 | from ultima_scraper_renamer.reformat import (
 16 |     FormatAttributes,
 17 |     ReformatItem,
 18 |     ReformatManager,
 19 | )
 20 | 
 21 | if TYPE_CHECKING:
 22 |     api_types = ultima_scraper_api.api_types
 23 |     user_types = ultima_scraper_api.user_types
 24 |     from ultima_scraper_collection import datascraper_types
 25 |     from ultima_scraper_collection.config import site_config_types
 26 | 
 27 | 
 28 | class FilesystemManager:
 29 |     def __init__(self) -> None:
 30 |         self.user_data_directory = Path("__user_data__")
 31 |         self.trash_directory = self.user_data_directory.joinpath("trash")
 32 |         self.profiles_directory = self.user_data_directory.joinpath("profiles")
 33 |         self.devices_directory = self.user_data_directory.joinpath("drm_device")
 34 |         self.settings_directory = Path("__user_data__")
 35 |         self.ignore_files = ["desktop.ini", ".DS_Store", ".DS_store", "@eaDir"]
 36 |         self.directory_manager: DirectoryManager | None = None
 37 |         self.directory_manager_users: dict[int, DirectoryManager] = {}
 38 |         self.file_manager_users: dict[int, FileManager] = {}
 39 | 
 40 |     def __iter__(self):
 41 |         for each in self.__dict__.values():
 42 |             yield each
 43 | 
 44 |     def check(self):
 45 |         for directory in self:
 46 |             if isinstance(directory, Path):
 47 |                 directory.mkdir(exist_ok=True)
 48 | 
 49 |     def move(self, src: Path, trg: Path):
 50 |         shutil.move(src, trg)
 51 | 
 52 |     def remove_mandatory_files(
 53 |         self, files: list[Path] | Generator[Path, None, None], keep: list[str] = []
 54 |     ):
 55 |         folders = [x for x in files if x.name not in self.ignore_files]
 56 |         if keep:
 57 |             folders = [x for x in files if x.name in keep]
 58 |         return folders
 59 | 
 60 |     def get_directory_manager(self, user_id: int):
 61 |         return self.directory_manager_users[user_id]
 62 | 
 63 |     def get_file_manager(self, user_id: int):
 64 |         return self.file_manager_users[user_id]
 65 | 
 66 |     def activate_directory_manager(self, site_config: site_config_types):
 67 |         root_metadata_directory = usc_helper.check_space(
 68 |             site_config.metadata_setup.directories
 69 |         )
 70 |         root_download_directory = usc_helper.check_space(
 71 |             site_config.download_setup.directories
 72 |         )
 73 |         self.directory_manager = DirectoryManager(
 74 |             site_config,
 75 |             root_metadata_directory,
 76 |             root_download_directory,
 77 |         )
 78 | 
 79 |     def trash(self):
 80 |         pass
 81 | 
 82 |     async def write_data(
 83 |         self, response: ClientResponse, download_path: Path, callback: Any = None
 84 |     ):
 85 |         status_code = None
 86 |         if response.status == 200:
 87 |             total_length = 0
 88 |             os.makedirs(os.path.dirname(download_path), exist_ok=True)
 89 |             with open_partial(download_path) as f:
 90 |                 partial_path = f.name
 91 |                 try:
 92 |                     async for data in response.content.iter_chunked(4096):
 93 |                         f.write(data)
 94 |                         length = len(data)
 95 |                         total_length += length
 96 |                         if callback:
 97 |                             callback(length)
 98 |                 except EXCEPTION_TEMPLATE as _e:
 99 |                     status_code = 1
100 |                 except Exception as _e:
101 |                     raise Exception(f"Unknown Error: {_e}")
102 |                 except:
103 |                     os.unlink(partial_path)
104 |                     raise
105 |                 else:
106 |                     if status_code:
107 |                         os.unlink(partial_path)
108 |                     else:
109 |                         try:
110 |                             os.replace(partial_path, download_path)
111 |                         except OSError:
112 |                             pass
113 |         else:
114 |             if response.content_length:
115 |                 pass
116 |                 # progress_bar.update_total_size(-response.content_length)
117 |             status_code = 2
118 |         return status_code
119 | 
120 |     async def create_option(
121 |         self,
122 |         datascraper: datascraper_types,
123 |         username: str,
124 |         directory: Path,
125 |         format_key: str,
126 |     ):
127 |         api = datascraper.api
128 |         option = {
129 |             "site_name": api.site_name,
130 |             "profile_username": username,
131 |             "model_username": username,
132 |             "directory": directory,
133 |         }
134 |         reformat_item_fd = ReformatItem(option)
135 |         assert self.directory_manager
136 |         f_d_p = reformat_item_fd.remove_non_unique(self.directory_manager, format_key)
137 |         return f_d_p
138 | 
139 |     async def create_directory_manager(
140 |         self, site_config: site_config_types, user: user_types
141 |     ):
142 |         if self.directory_manager:
143 |             final_download_directory = await self.discover_main_directory(user)
144 |             final_root_download_directory = (
145 |                 self.directory_manager.root_download_directory
146 |             )
147 |             for directory in site_config.download_setup.directories:
148 |                 assert directory.path
149 |                 if directory.path.as_posix() in final_download_directory.as_posix():
150 |                     final_root_download_directory = directory.path
151 |                     break
152 |             directory_manager = DirectoryManager(
153 |                 site_config,
154 |                 self.directory_manager.root_metadata_directory,
155 |                 final_root_download_directory,
156 |             )
157 |             self.directory_manager_users[user.id] = directory_manager
158 |             self.file_manager_users[user.id] = FileManager(directory_manager)
159 |             return directory_manager
160 | 
161 |     async def discover_main_metadata_directory(self, subscription: user_types):
162 |         usernames = subscription.get_usernames(ignore_id=False)
163 |         valid_usernames = subscription.get_usernames(ignore_id=True)
164 |         authed = subscription.get_authed()
165 |         reformat_manager = ReformatManager(authed, self)
166 |         directory_manager = self.directory_manager
167 |         site_config = directory_manager.site_config
168 |         final_store_directory = None
169 |         for username in usernames:
170 |             for store_directory in [
171 |                 x.path for x in site_config.metadata_setup.directories if x.path
172 |             ]:
173 |                 download_directory_reformat_item = (
174 |                     reformat_manager.prepare_user_reformat(
175 |                         subscription, store_directory, username=username
176 |                     )
177 |                 )
178 |                 formatted_download_directory = (
179 |                     download_directory_reformat_item.reformat(
180 |                         site_config.metadata_setup.directory_format
181 |                     )
182 |                 )
183 |                 final_store_directory = formatted_download_directory
184 |                 if final_store_directory.exists():
185 |                     if username == f"u{subscription.id}":
186 |                         if valid_usernames:
187 |                             download_directory_reformat_item = (
188 |                                 reformat_manager.prepare_user_reformat(
189 |                                     subscription,
190 |                                     store_directory,
191 |                                     username=valid_usernames[-1],
192 |                                 )
193 |                             )
194 |                             formatted_download_directory = (
195 |                                 download_directory_reformat_item.reformat(
196 |                                     site_config.metadata_setup.directory_format
197 |                                 )
198 |                             )
199 |                             if not formatted_download_directory.exists():
200 |                                 formatted_download_directory.mkdir(
201 |                                     exist_ok=True, parents=True
202 |                                 )
203 |                                 final_store_directory.rename(
204 |                                     formatted_download_directory
205 |                                 )
206 |                                 final_store_directory = formatted_download_directory
207 |                             else:
208 |                                 final_store_directory = formatted_download_directory
209 |                             return final_store_directory
210 |                     else:
211 |                         return final_store_directory
212 |         return final_store_directory
213 | 
214 |     async def discover_main_directory(self, subscription: user_types):
215 |         usernames = subscription.get_usernames(ignore_id=False)
216 |         if f"u{subscription.id}" not in usernames:
217 |             usernames.append(f"u{subscription.id}")
218 |         valid_usernames = subscription.get_usernames(ignore_id=True)
219 |         authed = subscription.get_authed()
220 |         reformat_manager = ReformatManager(authed, self)
221 |         directory_manager = self.directory_manager
222 |         assert directory_manager
223 |         site_config = directory_manager.site_config
224 |         store_directories = [
225 |             x.path for x in site_config.download_setup.directories if x.path
226 |         ]
227 | 
228 |         for username in usernames:
229 |             for store_directory in store_directories:
230 |                 download_directory_reformat_item = (
231 |                     reformat_manager.prepare_user_reformat(
232 |                         subscription, store_directory, username=username
233 |                     )
234 |                 )
235 |                 formatted_download_directory = (
236 |                     download_directory_reformat_item.remove_non_unique(
237 |                         directory_manager, "file_directory_format"
238 |                     )
239 |                 )
240 | 
241 |                 if formatted_download_directory.exists():
242 |                     if username == f"u{subscription.id}" and valid_usernames:
243 |                         download_directory_reformat_item = (
244 |                             reformat_manager.prepare_user_reformat(
245 |                                 subscription,
246 |                                 store_directory,
247 |                                 username=valid_usernames[-1],
248 |                             )
249 |                         )
250 |                         new_formatted_download_directory = (
251 |                             download_directory_reformat_item.remove_non_unique(
252 |                                 directory_manager, "file_directory_format"
253 |                             )
254 |                         )
255 |                         if not new_formatted_download_directory.exists():
256 |                             # formatted_download_directory.mkdir(
257 |                             #     exist_ok=True, parents=True
258 |                             # )
259 |                             formatted_download_directory.rename(
260 |                                 new_formatted_download_directory
261 |                             )
262 |                             formatted_download_directory = (
263 |                                 new_formatted_download_directory
264 |                             )
265 |                         return formatted_download_directory
266 |                     return formatted_download_directory
267 | 
268 |         download_directory_reformat_item = reformat_manager.prepare_user_reformat(
269 |             subscription, directory_manager.root_download_directory, username=username
270 |         )
271 |         formatted_download_directory = (
272 |             download_directory_reformat_item.remove_non_unique(
273 |                 directory_manager, "file_directory_format"
274 |             )
275 |         )
276 |         return formatted_download_directory
277 | 
278 |     async def discover_alternative_directories(self, subscription: user_types):
279 |         usernames = subscription.get_usernames(ignore_id=False)
280 |         authed = subscription.get_authed()
281 |         reformat_manager = ReformatManager(authed, self)
282 |         directory_manager = self.get_directory_manager(subscription.id)
283 |         site_config = directory_manager.site_config
284 |         for username in usernames:
285 |             for alt_download_directory in [
286 |                 x.path for x in site_config.download_setup.directories if x.path
287 |             ]:
288 |                 alt_download_directory_reformat_item = (
289 |                     reformat_manager.prepare_user_reformat(
290 |                         subscription, alt_download_directory, username=username
291 |                     )
292 |                 )
293 |                 formatted_alt_download_directory = (
294 |                     alt_download_directory_reformat_item.remove_non_unique(
295 |                         directory_manager, "file_directory_format"
296 |                     )
297 |                 )
298 |                 if (
299 |                     formatted_alt_download_directory
300 |                     == directory_manager.user.download_directory
301 |                 ):
302 |                     continue
303 |                 if formatted_alt_download_directory.exists():
304 |                     directory_manager.user.alt_download_directories.append(
305 |                         formatted_alt_download_directory
306 |                     )
307 |         return directory_manager.user.alt_download_directories
308 | 
309 |     async def format_directories(self, performer: user_types) -> DirectoryManager:
310 |         authed = performer.get_authed()
311 |         directory_manager = self.get_directory_manager(performer.id)
312 |         file_manager = self.get_file_manager(performer.id)
313 | 
314 |         final_metadata_directory = await self.discover_main_metadata_directory(
315 |             performer
316 |         )
317 |         directory_manager.user.metadata_directory = final_metadata_directory
318 | 
319 |         final_download_directory = await self.discover_main_directory(performer)
320 |         directory_manager.user.download_directory = final_download_directory
321 | 
322 |         api = authed.api
323 |         performer_username = performer.get_usernames(ignore_id=True)[-1]
324 |         site_name = authed.api.site_name
325 |         alt_directories = await self.discover_alternative_directories(performer)
326 |         await file_manager.set_default_files()
327 |         _metadata_filepaths = await file_manager.find_metadata_files(legacy_files=False)
328 |         # for metadata_filepath in metadata_filepaths:
329 |         #     if file_manager.directory_manager.user.metadata_directory.as_posix() in metadata_filepath.parent.as_posix():
330 |         #         continue
331 |         #     new_filepath = file_manager.directory_manager.user.metadata_directory.joinpath(metadata_filepath.name)
332 |         #     if new_filepath.exists():
333 |         #         new_filepath = usc_helper.find_unused_filename(
334 |         #             new_filepath
335 |         #         )
336 |         #         if new_filepath.exists():
337 |         #             breakpoint()
338 |         #     file_manager.rename_path(metadata_filepath, new_filepath)
339 |         #     pass
340 |         # alt_files = await usc_helper.walk(
341 |         #     file_manager.directory_manager.user.download_directory
342 |         # )
343 |         # if not alt_files:
344 |         #     shutil.rmtree(file_manager.directory_manager.user.download_directory)
345 |         await file_manager.merge_alternative_directories(alt_directories)
346 |         user_metadata_directory = directory_manager.user.metadata_directory
347 |         assert user_metadata_directory
348 |         _user_download_directory = directory_manager.user.download_directory
349 |         legacy_metadata_directory = user_metadata_directory
350 |         directory_manager.user.legacy_metadata_directories.append(
351 |             legacy_metadata_directory
352 |         )
353 |         items = api.CategorizedContent()
354 |         for api_type, _ in items:
355 |             legacy_metadata_directory_2 = user_metadata_directory.joinpath(api_type)
356 |             directory_manager.user.legacy_metadata_directories.append(
357 |                 legacy_metadata_directory_2
358 |             )
359 |         legacy_model_directory = directory_manager.root_download_directory.joinpath(
360 |             site_name, performer_username
361 |         )
362 |         directory_manager.user.legacy_download_directories.append(
363 |             legacy_model_directory
364 |         )
365 |         return directory_manager
366 | 
367 | 
368 | class DirectoryManager:
369 |     def __init__(
370 |         self,
371 |         site_config: site_config_types,
372 |         root_metadata_directory: Path,
373 |         root_download_directory: Path,
374 |     ) -> None:
375 |         self.root_directory = Path()
376 |         self.root_metadata_directory = Path(root_metadata_directory)
377 |         self.root_download_directory = Path(root_download_directory)
378 |         self.user = self.UserDirectories()
379 |         self.site_config = site_config
380 |         formats = FormatTypes(site_config)
381 |         string, status = formats.check_rules()
382 |         if not status:
383 |             print(string)
384 |             exit(0)
385 |         self.formats = formats
386 |         pass
387 | 
388 |     def create_directories(self):
389 |         # self.profile.create_directories()
390 |         self.root_metadata_directory.mkdir(exist_ok=True)
391 |         self.root_download_directory.mkdir(exist_ok=True)
392 | 
393 |     def delete_empty_directories(
394 |         self, directory: Path, filesystem_manager: FilesystemManager
395 |     ):
396 |         for root, dirnames, _files in os.walk(directory, topdown=False):
397 |             for dirname in dirnames:
398 |                 full_path = os.path.realpath(os.path.join(root, dirname))
399 |                 contents = os.listdir(full_path)
400 |                 if not contents:
401 |                     shutil.rmtree(full_path, ignore_errors=True)
402 |                 else:
403 |                     content_paths = [Path(full_path, content) for content in contents]
404 |                     contents = filesystem_manager.remove_mandatory_files(content_paths)
405 |                     if not contents:
406 |                         shutil.rmtree(full_path, ignore_errors=True)
407 | 
408 |         if os.path.exists(directory) and not os.listdir(directory):
409 |             os.rmdir(directory)
410 | 
411 |     # class ProfileDirectories:
412 |     #     def __init__(self, root_directory: Path) -> None:
413 |     #         self.root_directory = Path(root_directory)
414 |     #         self.metadata_directory = self.root_directory.joinpath("Metadata")
415 |     #     def create_directories(self):
416 |     #         self.root_directory.mkdir(exist_ok=True)
417 | 
418 |     class UserDirectories:
419 |         def __init__(self) -> None:
420 |             self.metadata_directory: Path | None = None
421 |             self.download_directory: Path | None = None
422 |             self.alt_download_directories: list[Path] = []
423 |             self.legacy_download_directories: list[Path] = []
424 |             self.legacy_metadata_directories: list[Path] = []
425 | 
426 |         def find_legacy_directory(
427 |             self,
428 |             directory_type: Literal["metadata", "download"] = "metadata",
429 |             api_type: str = "",
430 |         ):
431 |             match directory_type:
432 |                 case "metadata":
433 |                     directories = self.legacy_metadata_directories
434 |                 case _:
435 |                     directories = self.legacy_download_directories
436 |             final_directory = directories[0]
437 |             for directory in directories:
438 |                 for part in directory.parts:
439 |                     if api_type in part:
440 |                         return directory
441 |             return final_directory
442 | 
443 |     async def walk(self, directory: Path):
444 |         return await usc_helper.walk(directory)
445 | 
446 | 
447 | class FileManager:
448 |     def __init__(self, directory_manager: DirectoryManager) -> None:
449 |         self.files: list[Path] = []
450 |         self.directory_manager = directory_manager
451 | 
452 |     async def set_default_files(
453 |         self,
454 |     ):
455 |         assert self.directory_manager.user.metadata_directory
456 |         assert self.directory_manager.user.download_directory
457 |         await self.update_files(self.directory_manager.user.metadata_directory)
458 |         await self.update_files(self.directory_manager.user.download_directory)
459 | 
460 |     async def refresh_files(self):
461 |         return await self.set_default_files()
462 | 
463 |     async def update_files(self, directory: Path):
464 |         directory_manager = self.directory_manager
465 |         files = await directory_manager.walk(directory)
466 |         self.files.extend(files)
467 |         return files
468 | 
469 |     def add_file(self, filepath: Path):
470 |         self.files.append(filepath)
471 |         return True
472 | 
473 |     def remove_file(self, filepath: Path):
474 |         if filepath in self.files:
475 |             self.files.remove(filepath)
476 |             return True
477 |         return False
478 | 
479 |     def rename_path(self, old_filepath: Path, new_filepath: Path):
480 |         self.remove_file(old_filepath)
481 |         self.add_file(new_filepath)
482 |         new_filepath.parent.mkdir(exist_ok=True, parents=True)
483 |         shutil.move(old_filepath, new_filepath)
484 |         return True
485 | 
486 |     def delete_path(self, filepath: Path):
487 |         if filepath.is_dir():
488 |             filepath.rmdir()
489 |         else:
490 |             self.remove_file(filepath)
491 |             filepath.unlink(missing_ok=True)
492 |         return True
493 | 
494 |     async def cleanup(self):
495 |         unique: set[Path] = set()
496 |         await self.refresh_files()
497 |         for valid_file in self.find_string_in_path("__drm__"):
498 |             self.delete_path(valid_file)
499 |             unique.add(valid_file.parent)
500 |         for unique_file in unique:
501 |             self.delete_path(unique_file)
502 |         return True
503 | 
504 |     def find_string_in_path(self, string: str):
505 |         valid_files: list[Path] = []
506 |         for file in self.files:
507 |             if string in file.as_posix():
508 |                 valid_files.append(file)
509 |         return valid_files
510 | 
511 |     async def find_metadata_files(self, legacy_files: bool = True):
512 |         new_list: list[Path] = []
513 |         for filepath in self.files:
514 |             if not legacy_files:
515 |                 if "__legacy_metadata__" in filepath.parts:
516 |                     continue
517 |             match filepath.suffix:
518 |                 case ".db":
519 |                     red_list = ["thumbs.db"]
520 |                     status = [x for x in red_list if x == filepath.name.lower()]
521 |                     if status:
522 |                         continue
523 |                     new_list.append(filepath)
524 |                 case ".json":
525 |                     new_list.append(filepath)
526 |                 case _:
527 |                     pass
528 |         return new_list
529 | 
530 |     async def merge_alternative_directories(self, alt_directories: list[Path]):
531 |         directory_manager = self.directory_manager
532 |         assert directory_manager.user.download_directory
533 |         for alt_download_directory in alt_directories:
534 |             alt_files = await directory_manager.walk(alt_download_directory)
535 |             for alt_file in alt_files:
536 |                 new_filepath = Path(
537 |                     alt_file.as_posix().replace(
538 |                         alt_download_directory.as_posix(),
539 |                         directory_manager.user.download_directory.as_posix(),
540 |                     )
541 |                 )
542 | 
543 |                 if alt_file.suffix in [".json", ".db"]:
544 |                     if new_filepath.exists():
545 |                         new_filepath = usc_helper.find_unused_filename(new_filepath)
546 |                         if new_filepath.exists():
547 |                             breakpoint()
548 |                 if new_filepath.exists():
549 |                     old_checksum = hashlib.md5(alt_file.read_bytes()).hexdigest()
550 |                     new_checksum = hashlib.md5(new_filepath.read_bytes()).hexdigest()
551 |                     if old_checksum == new_checksum:
552 |                         self.delete_path(alt_file)
553 |                     else:
554 |                         old_size = alt_file.stat().st_size
555 |                         new_size = new_filepath.stat().st_size
556 |                         if old_size > new_size:
557 |                             self.rename_path(alt_file, new_filepath)
558 |                         elif new_size > old_size:
559 |                             self.delete_path(alt_file)
560 |                         elif old_size == new_size:
561 |                             if usc_helper.is_image_valid(new_filepath):
562 |                                 self.delete_path(alt_file)
563 |                             elif usc_helper.is_image_valid(alt_file):
564 |                                 self.rename_path(alt_file, new_filepath)
565 |                 else:
566 |                     self.rename_path(alt_file, new_filepath)
567 | 
568 |             alt_files = await usc_helper.walk(alt_download_directory)
569 |             if not alt_files:
570 |                 shutil.rmtree(alt_download_directory)
571 | 
572 | 
573 | class FormatTypes:
574 |     def __init__(self, site_settings: site_config_types) -> None:
575 |         self.metadata_directory_format = site_settings.metadata_setup.directory_format
576 |         self.file_directory_format = site_settings.download_setup.directory_format
577 |         self.filename_format = site_settings.download_setup.filename_format
578 | 
579 |     def check_rules(self):
580 |         """Checks for invalid filepath
581 | 
582 |         Returns:
583 |             tuple(str,bool): Returns a string which explains invalid filepath format
584 |         """
585 |         bool_status = True
586 |         wl = []
587 |         invalid_list = []
588 |         string = ""
589 |         for key, _value in self:
590 |             if key == "file_directory_format":
591 |                 bl = FormatAttributes()
592 |                 wl = [v for _k, v in bl.__dict__.items()]
593 |                 bl = bl.whitelist(wl)
594 |                 invalid_list = []
595 |                 for b in bl:
596 |                     if b in self.file_directory_format.as_posix():
597 |                         invalid_list.append(b)
598 |             if key == "filename_format":
599 |                 bl = FormatAttributes()
600 |                 wl = [v for _k, v in bl.__dict__.items()]
601 |                 bl = bl.whitelist(wl)
602 |                 invalid_list = []
603 |                 for b in bl:
604 |                     if b in self.filename_format.as_posix():
605 |                         invalid_list.append(b)
606 |             if key == "metadata_directory_format":
607 |                 wl = [
608 |                     "{site_name}",
609 |                     "{first_letter}",
610 |                     "{model_id}",
611 |                     "{profile_username}",
612 |                     "{model_username}",
613 |                 ]
614 |                 bl = FormatAttributes().whitelist(wl)
615 |                 invalid_list: list[str] = []
616 |                 for b in bl:
617 |                     if b in self.metadata_directory_format.as_posix():
618 |                         invalid_list.append(b)
619 |             if invalid_list:
620 |                 string += f"You cannot use {','.join(invalid_list)} in {key}. Use any from this list {','.join(wl)}"
621 |                 bool_status = False
622 | 
623 |         return string, bool_status
624 | 
625 |     def check_unique(self):
626 |         values: list[str] = []
627 |         unique = []
628 |         new_format_copied = copy.deepcopy(self)
629 |         option: dict[str, Any] = {}
630 |         option["string"] = ""
631 |         option["bool_status"] = True
632 |         option["unique"] = new_format_copied
633 |         f = FormatAttributes()
634 |         for key, value in self:
635 |             value: Path
636 |             if key == "file_directory_format":
637 |                 unique = ["{media_id}", "{model_username}"]
638 |                 values = list(value.parts)
639 |                 option["unique"].file_directory_format = unique
640 |             elif key == "filename_format":
641 |                 values = []
642 |                 unique = ["{media_id}", "{filename}"]
643 |                 for _key2, value2 in f:
644 |                     if value2 in value.as_posix():
645 |                         values.append(value2)
646 |                 option["unique"].filename_format = unique
647 |             elif key == "metadata_directory_format":
648 |                 unique = ["{model_username}"]
649 |                 values = list(value.parts)
650 |                 option["unique"].metadata_directory_format = unique
651 |             if key != "filename_format":
652 |                 e = [x for x in values if x in unique]
653 |             else:
654 |                 e = [x for x in unique if x in values]
655 |             if e:
656 |                 setattr(option["unique"], key, e)
657 |             else:
658 |                 option[
659 |                     "string"
660 |                 ] += f"{key} is a invalid format since it has no unique identifiers. Use any from this list {','.join(unique)}\n"
661 |                 option["bool_status"] = False
662 |         return option
663 | 
664 |     def __iter__(self):
665 |         for attr, value in self.__dict__.items():
666 |             yield attr, value
667 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/metadata_manager/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/managers/metadata_manager/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/option_manager.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | from ultima_scraper_collection.config import auto_types
  3 | 
  4 | 
  5 | class OptionManager:
  6 |     def __init__(self) -> None:
  7 |         self.performer_options: OptionsFormat | None = None
  8 |         self.subscription_options: OptionsFormat | None = None
  9 |         pass
 10 | 
 11 |     async def create_option(
 12 |         self,
 13 |         items: list[Any],
 14 |         category: str,
 15 |         auto_choice: auto_types = False,
 16 |     ):
 17 |         option = await OptionsFormat(items, category, auto_choice).formatter()
 18 |         return option
 19 | 
 20 | 
 21 | class OptionsFormat:
 22 |     def __init__(
 23 |         self,
 24 |         items: list[Any],
 25 |         options_type: str,
 26 |         auto_choice: auto_types = False,
 27 |     ) -> None:
 28 |         self.items = items
 29 |         self.item_keys: list[str] = []
 30 |         self.string = ""
 31 |         self.options_type = options_type
 32 |         self.auto_choice = auto_choice
 33 |         self.final_choices = []
 34 | 
 35 |     async def formatter(self):
 36 |         options_type = self.options_type
 37 |         final_string = f"Choose {options_type.capitalize()}: 0 = All"
 38 |         auto_choice = self.auto_choice
 39 |         if type(auto_choice) == int:
 40 |             auto_choice = str(auto_choice)
 41 | 
 42 |         if isinstance(auto_choice, str):
 43 |             auto_choice = [x for x in auto_choice.split(",") if x]
 44 |             auto_choice = (
 45 |                 True if any(x in ["0", "all"] for x in auto_choice) else auto_choice
 46 |             )
 47 | 
 48 |         if isinstance(auto_choice, list):
 49 |             auto_choice = [x for x in auto_choice if x]
 50 |         self.auto_choice = auto_choice
 51 | 
 52 |         match options_type:
 53 |             case "sites":
 54 |                 self.item_keys = self.items
 55 |                 my_string = " | ".join(
 56 |                     map(lambda x: f"{self.items.index(x)+1} = {x}", self.items)
 57 |                 )
 58 |                 final_string = f"{final_string} | {my_string}"
 59 |                 self.string = final_string
 60 |                 final_list = await self.choose_option()
 61 |                 self.final_choices = [
 62 |                     key
 63 |                     for choice in final_list
 64 |                     for key in self.items
 65 |                     if choice.lower() == key.lower()
 66 |                 ]
 67 |             case "profiles":
 68 |                 self.item_keys = [x.get_auth_details().username for x in self.items]
 69 |                 my_string = " | ".join(
 70 |                     map(
 71 |                         lambda x: f"{self.items.index(x)+1} = {x.get_auth_details().username}",
 72 |                         self.items,
 73 |                     )
 74 |                 )
 75 |                 final_string = f"{final_string} | {my_string}"
 76 |                 self.string = final_string
 77 |                 final_list = await self.choose_option()
 78 |                 self.final_choices = [
 79 |                     key
 80 |                     for choice in final_list
 81 |                     for key in self.items
 82 |                     if choice.lower() == key.get_auth_details().username.lower()
 83 |                 ]
 84 |                 set1 = set(self.final_choices)
 85 |                 set2 = set(self.items)
 86 |                 difference = list(set2 - set1)
 87 |                 for auth in difference:
 88 |                     await auth.session_manager.active_session.close()
 89 |             case "subscriptions":
 90 |                 subscription_users = [x for x in self.items]
 91 |                 self.item_keys = [x.username for x in subscription_users]
 92 |                 my_string = " | ".join(
 93 |                     map(
 94 |                         lambda x: f"{subscription_users.index(x)+1} = {x.username}",
 95 |                         subscription_users,
 96 |                     )
 97 |                 )
 98 |                 final_string = f"{final_string} | {my_string}"
 99 |                 self.string = final_string
100 |                 final_list = await self.choose_option()
101 |                 self.final_choices = [
102 |                     key
103 |                     for choice in final_list
104 |                     for key in subscription_users
105 |                     if choice.lower() == key.username.lower()
106 |                 ]
107 | 
108 |             case "contents":
109 |                 self.item_keys = self.items
110 |                 my_string = " | ".join(
111 |                     map(lambda x: f"{self.items.index(x)+1} = {x}", self.items)
112 |                 )
113 |                 final_string = f"{final_string} | {my_string}"
114 |                 self.string = final_string
115 |                 final_list = await self.choose_option()
116 |                 self.final_choices = [
117 |                     key
118 |                     for choice in final_list
119 |                     for key in self.items
120 |                     if choice.lower() == key.lower()
121 |                 ]
122 |             case "medias":
123 |                 self.item_keys = self.items
124 |                 my_string = " | ".join(
125 |                     map(lambda x: f"{self.items.index(x)+1} = {x}", self.items)
126 |                 )
127 |                 final_string = f"{final_string} | {my_string}"
128 |                 self.string = final_string
129 |                 final_list = await self.choose_option()
130 |                 self.final_choices = [
131 |                     key
132 |                     for choice in final_list
133 |                     for key in self.items
134 |                     if choice.lower() == key.lower()
135 |                 ]
136 |             case _:
137 |                 final_list = []
138 |         return self
139 | 
140 |     async def choose_option(self):
141 |         def process_option(input_values: list[str]):
142 |             input_list_2: list[str] = []
143 |             for input_value in input_values:
144 |                 if input_value.isdigit():
145 |                     try:
146 |                         input_list_2.append(self.item_keys[int(input_value) - 1])
147 |                     except IndexError:
148 |                         continue
149 |                 else:
150 |                     x = [x for x in self.item_keys if x.lower() == input_value.lower()]
151 |                     input_list_2.extend(x)
152 |             return input_list_2
153 | 
154 |         input_list: list[str] = [x.lower() for x in self.item_keys]
155 |         final_list: list[str] = []
156 |         if self.auto_choice:
157 |             if not self.scrape_all():
158 |                 if isinstance(self.auto_choice, list):
159 |                     input_values = [str(x).lower() for x in self.auto_choice]
160 |                     input_list = process_option(input_values)
161 |         else:
162 |             print(self.string)
163 |             input_value = input().lower()
164 |             if input_value != "0" and input_value != "all":
165 |                 input_values = input_value.split(",")
166 |                 input_list = process_option(input_values)
167 |         final_list = input_list
168 |         return final_list
169 | 
170 |     def scrape_all(self):
171 |         status = False
172 |         if (
173 |             self.auto_choice == True
174 |             or isinstance(self.auto_choice, list)
175 |             and isinstance(self.auto_choice[0], str)
176 |             and (
177 |                 self.auto_choice[0].lower() == "all"
178 |                 or self.auto_choice[0].lower() == "0"
179 |             )
180 |         ):
181 |             status = True
182 |         return status
183 | 
184 |     def return_auto_choice(self):
185 |         identifiers: list[int | str] | int | str | bool = []
186 |         if isinstance(self.auto_choice, list):
187 |             identifiers = [x for x in self.auto_choice if not isinstance(x, bool)]
188 |         return identifiers
189 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/managers/server_manager.py:
--------------------------------------------------------------------------------
  1 | import socket
  2 | from typing import Sequence
  3 | 
  4 | import netifaces
  5 | from sqlalchemy import select
  6 | from ultima_scraper_db.databases.ultima_archive.database_api import ArchiveAPI
  7 | from ultima_scraper_db.databases.ultima_archive.schemas.management import (
  8 |     ServerModel,
  9 |     SiteModel,
 10 | )
 11 | from ultima_scraper_db.managers.database_manager import Database, Schema
 12 | 
 13 | 
 14 | class ServerManager:
 15 |     def __init__(self, ultima_archive_db_api: ArchiveAPI) -> None:
 16 |         self.ultima_archive_db_api = ultima_archive_db_api
 17 | 
 18 |     async def init(self, database: Database):
 19 |         def create_socket(socket_type: socket.SocketKind = socket.SOCK_DGRAM):
 20 |             temp_socket = socket.socket(socket.AF_INET, socket_type)
 21 |             temp_socket.connect(("8.8.8.8", 80))  # Connecting to Google's DNS server
 22 |             return temp_socket
 23 | 
 24 |         def get_local_ip():
 25 |             # Create a temporary connection to a remote server to retrieve the local IP address
 26 |             temp_socket = create_socket()
 27 |             local_ip = temp_socket.getsockname()[0]
 28 |             temp_socket.close()
 29 |             return local_ip
 30 | 
 31 |         def mac_for_ip(ip: str) -> str | None:
 32 |             "Returns a list of MACs for interfaces that have given IP, returns None if not found"
 33 |             for i in netifaces.interfaces():  # type: ignore
 34 |                 addrs = netifaces.ifaddresses(i)  # type: ignore
 35 |                 try:
 36 |                     if_mac: str | None = addrs[netifaces.AF_LINK][0]["addr"]  # type: ignore
 37 |                     if_ip: str | None = addrs[netifaces.AF_INET][0]["addr"]  # type: ignore
 38 |                 except (IndexError, KeyError):  # ignore ifaces that dont have MAC or IP
 39 |                     if_mac = if_ip = None
 40 |                 if if_ip == ip:
 41 |                     return if_mac  # type: ignore
 42 |             return None
 43 | 
 44 |         async with self.ultima_archive_db_api.create_management_api() as management_api:
 45 |             session = management_api.get_session()
 46 |             db_sites = await session.scalars(select(SiteModel))
 47 |             db_sites = db_sites.all()
 48 |             self.reset = False
 49 |             if not db_sites:
 50 |                 # Need to add a create or update for additional sites
 51 |                 from ultima_scraper_db.databases.ultima_archive.schemas.management import (
 52 |                     default_sites,
 53 |                 )
 54 | 
 55 |                 for site in default_sites:
 56 |                     session.add(site)
 57 |                 await session.commit()
 58 |                 db_sites = await session.scalars(select(SiteModel))
 59 |                 db_sites = db_sites.all()
 60 |                 self.reset = True
 61 |             private_ip = get_local_ip()
 62 |             mac_address = mac_for_ip(private_ip)
 63 |             # public_ip = requests.get("https://checkip.amazonaws.com/").text.strip()
 64 |             self.db_sites: Sequence[SiteModel] = db_sites
 65 |             self.ip_address = private_ip
 66 | 
 67 |             db_servers = await session.scalars(select(ServerModel))
 68 |             db_servers = db_servers.all()
 69 |             if not db_servers:
 70 |                 default_server = ServerModel(
 71 |                     name="home", ip=self.ip_address, mac_address=mac_address
 72 |                 )
 73 |                 session.add(default_server)
 74 |                 await session.commit()
 75 |             active_server = await session.scalars(
 76 |                 select(ServerModel).where(
 77 |                     (ServerModel.ip == self.ip_address)
 78 |                     & (ServerModel.mac_address == mac_address)
 79 |                 )
 80 |             )
 81 |             self.active_server = active_server.one()
 82 |             self.site_schemas: dict[str, Schema] = {}
 83 |             for db_site in self.db_sites:
 84 |                 site_schema_api = self.ultima_archive_db_api.get_site_api(
 85 |                     db_site.db_name
 86 |                 )
 87 |                 self.site_schemas[site_schema_api.schema.name] = site_schema_api.schema
 88 |             return self
 89 | 
 90 |     async def resolve_site_schema(self, value: str):
 91 |         return self.site_schemas[value]
 92 | 
 93 |     async def resolve_db_site(self, value: str):
 94 |         return [x for x in self.db_sites if x.db_name == value][0]
 95 | 
 96 |     async def find_site_api(self, name: str):
 97 |         return self.ultima_archive_db_api.site_apis[name]
 98 | 
 99 |     def get_server_id(self):
100 |         return self.active_server.id
101 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/modules/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/modules/module_streamliner.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import asyncio
  4 | import copy
  5 | from typing import Any
  6 | 
  7 | import ultima_scraper_api
  8 | from sqlalchemy import and_, or_, select
  9 | from sqlalchemy.orm import joinedload
 10 | from ultima_scraper_api.apis.onlyfans.classes.auth_model import OnlyFansAuthModel
 11 | from ultima_scraper_api.apis.onlyfans.classes.user_model import (
 12 |     create_user as OnlyFansUserModel,
 13 | )
 14 | from ultima_scraper_api.helpers.main_helper import ProgressBar
 15 | from ultima_scraper_collection.config import site_config_types
 16 | from ultima_scraper_collection.managers.content_manager import (
 17 |     ContentManager,
 18 |     MediaManager,
 19 | )
 20 | from ultima_scraper_collection.managers.download_manager import DownloadManager
 21 | from ultima_scraper_collection.managers.filesystem_manager import FilesystemManager
 22 | from ultima_scraper_collection.managers.metadata_manager.metadata_manager import (
 23 |     MediaMetadata,
 24 |     MetadataManager,
 25 | )
 26 | from ultima_scraper_collection.managers.server_manager import ServerManager
 27 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import (
 28 |     FilePathModel as DBFilePathModel,
 29 | )
 30 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import (
 31 |     MediaModel as DBMediaModel,
 32 | )
 33 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import (
 34 |     MessageModel as DBMessageModel,
 35 | )
 36 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import UserModel
 37 | from ultima_scraper_db.databases.ultima_archive.schemas.templates.site import (
 38 |     UserModel as DBUserModel,
 39 | )
 40 | from ultima_scraper_renamer.reformat import ReformatManager
 41 | 
 42 | auth_types = ultima_scraper_api.auth_types
 43 | user_types = ultima_scraper_api.user_types
 44 | message_types = ultima_scraper_api.message_types
 45 | error_types = ultima_scraper_api.error_types
 46 | subscription_types = ultima_scraper_api.subscription_types
 47 | from typing import TYPE_CHECKING
 48 | 
 49 | if TYPE_CHECKING:
 50 |     from ultima_scraper_collection.managers.datascraper_manager.datascrapers.fansly import (
 51 |         FanslyDataScraper,
 52 |     )
 53 |     from ultima_scraper_collection.managers.datascraper_manager.datascrapers.onlyfans import (
 54 |         OnlyFansDataScraper,
 55 |     )
 56 | 
 57 |     datascraper_types = OnlyFansDataScraper | FanslyDataScraper
 58 | 
 59 | 
 60 | async def find_earliest_non_downloaded_message(
 61 |     user: user_types, datascraper: "datascraper_types"
 62 | ):
 63 |     authed = user.get_authed()
 64 |     site_api = datascraper.server_manager.ultima_archive_db_api.get_site_api(
 65 |         authed.get_api().site_name
 66 |     )
 67 |     earliest_non_downloaded_message = None
 68 |     found_media = False
 69 |     db_performer = datascraper.db_performers[user.id]
 70 |     db_content_manager = db_performer.content_manager
 71 |     assert db_content_manager, "Content manager not found"
 72 |     db_messages = db_content_manager.messages
 73 |     for db_message in db_messages:
 74 |         if (db_message.user_id == authed.id and db_message.receiver_id == user.id) or (
 75 |             db_message.user_id == user.id and db_message.receiver_id == authed.id
 76 |         ):
 77 |             all_media_downloaded = False
 78 |             if db_message.media:
 79 |                 found_media = True
 80 |             else:
 81 |                 continue
 82 |             for media in db_message.media:
 83 |                 for filepath in media.filepaths:
 84 |                     if not filepath.message:
 85 |                         continue
 86 |                     if filepath.downloaded:
 87 |                         all_media_downloaded = True
 88 |                         break
 89 | 
 90 |             if not all_media_downloaded:
 91 |                 if (
 92 |                     earliest_non_downloaded_message is None
 93 |                     or db_message.created_at
 94 |                     < earliest_non_downloaded_message.created_at
 95 |                 ):
 96 |                     earliest_non_downloaded_message = db_message
 97 |     if found_media:
 98 |         earliest_non_downloaded_message = db_messages[0]
 99 |     await site_api.schema.session.commit()
100 |     return earliest_non_downloaded_message
101 | 
102 | 
103 | class StreamlinedDatascraper:
104 |     def __init__(
105 |         self, datascraper: datascraper_types, server_manager: ServerManager
106 |     ) -> None:
107 |         self.datascraper = datascraper
108 |         self.filesystem_manager = FilesystemManager()
109 |         self.media_types = self.datascraper.api.MediaTypes()
110 |         self.user_list: set[user_types] = set()
111 |         self.db_performers: dict[int, UserModel] = {}
112 |         self.metadata_manager_users: dict[int, MetadataManager] = {}
113 |         self.server_manager: ServerManager = server_manager
114 |         self.content_managers: dict[int, ContentManager] = {}
115 |         self.media_managers: dict[int, MediaManager] = {}
116 | 
117 |     def find_metadata_manager(self, user_id: int):
118 |         return self.metadata_manager_users[user_id]
119 | 
120 |     def resolve_content_manager(self, user: user_types):
121 |         content_manager = self.content_managers.get(user.id)
122 |         authed = user.get_authed()
123 |         if content_manager:
124 |             if content_manager.authed.id != authed.id:
125 |                 content_manager = ContentManager(authed)
126 |         else:
127 |             content_manager = ContentManager(authed)
128 |         self.content_managers[user.id] = content_manager
129 |         return content_manager
130 | 
131 |     def create_media_manager(self, user: user_types):
132 |         if user.id not in self.media_managers:
133 |             self.media_managers[user.id] = MediaManager()
134 |         return self.media_managers[user.id]
135 | 
136 |     def get_archive_db_api(self):
137 |         return self.server_manager.ultima_archive_db_api
138 | 
139 |     async def configure_datascraper_jobs(self):
140 |         api = self.datascraper.api
141 |         site_config = self.datascraper.site_config
142 |         available_jobs = site_config.jobs.scrape
143 |         option_manager = self.datascraper.option_manager
144 |         performer_options = option_manager.performer_options
145 |         assert option_manager.subscription_options, "Subscription options not found"
146 |         valid_user_list: set[user_types] = set(
147 |             option_manager.subscription_options.final_choices
148 |         )
149 |         scraping_subscriptions = site_config.jobs.scrape.subscriptions
150 |         identifiers = []
151 |         if performer_options:
152 |             identifiers = performer_options.return_auto_choice()
153 |         if not available_jobs.subscriptions:
154 |             for authed in api.auths.values():
155 |                 authed.subscriptions = []
156 |         if available_jobs.messages:
157 |             chat_users: list[user_types] = []
158 |             if identifiers:
159 |                 for authed in api.auths.values():
160 |                     for identifier in identifiers:
161 |                         chat_user = await authed.get_user(identifier)
162 |                         if isinstance(chat_user, user_types):
163 |                             chat_users.append(chat_user)
164 |             else:
165 |                 chat_users = await self.get_chat_users()
166 |             [
167 |                 user.scrape_whitelist.append("Messages")
168 |                 for user in chat_users
169 |                 if not user.is_subscribed() or not scraping_subscriptions
170 |             ]
171 |             [valid_user_list.add(x) for x in chat_users]
172 | 
173 |         if available_jobs.paid_contents:
174 |             for authed in self.datascraper.api.auths.values():
175 |                 paid_contents = await authed.get_paid_content()
176 |                 if not isinstance(paid_contents, error_types):
177 |                     for paid_content in paid_contents:
178 |                         author = paid_content.get_author()
179 |                         if identifiers:
180 |                             found = await author.match_identifiers(identifiers)
181 |                             if not found:
182 |                                 continue
183 |                         if author:
184 |                             performer = authed.find_user(
185 |                                 identifier=author.id,
186 |                             )
187 |                             if performer:
188 |                                 performer.job_whitelist.append("PaidContents")
189 |                                 performer.scrape_whitelist.clear()
190 |                                 valid_user_list.add(performer)
191 |         from ultima_scraper_api.apis.fansly.classes.user_model import (
192 |             create_user as FYUserModel,
193 |         )
194 | 
195 |         for user in valid_user_list:
196 |             if isinstance(user, FYUserModel) and user.following:
197 |                 user.scrape_whitelist.clear()
198 |                 pass
199 |             pass
200 |         # Need to filter out own profile with is_performer,etc
201 |         final_valid_user_set = {
202 |             user
203 |             for user in valid_user_list
204 |             if user.username not in user.get_authed().blacklist
205 |         }
206 | 
207 |         self.user_list = final_valid_user_set
208 |         return final_valid_user_set
209 | 
210 |     # Prepares the API links to be scraped
211 |     async def scrape_vault(
212 |         self, user: user_types, db_user: UserModel, content_type: str
213 |     ):
214 |         current_job = user.get_current_job()
215 |         if not current_job:
216 |             return
217 |         authed: auth_types = user.get_authed()
218 |         site_config = self.datascraper.site_config
219 |         if (
220 |             isinstance(authed, OnlyFansAuthModel)
221 |             and user.is_authed_user()
222 |             and user.is_performer()
223 |         ):
224 |             vault = await authed.get_vault_lists()
225 |             vault_item = vault.resolve(name=content_type)
226 |             assert vault_item, f"Vault item {content_type} not found"
227 |             vault_item_medias = await vault_item.get_medias()
228 |             media_metadatas: list[MediaMetadata] = []
229 |             for vault_item_media in vault_item_medias:
230 |                 content_manager = self.resolve_content_manager(user)
231 |                 media_metadata = MediaMetadata(
232 |                     vault_item_media["id"],
233 |                     vault_item_media["type"],
234 |                     content_manager=content_manager,
235 |                 )
236 |                 media_metadata.raw_extractor(user, vault_item_media)
237 |                 reformat_manager = ReformatManager(authed, self.filesystem_manager)
238 |                 reformat_item = reformat_manager.prepare_reformat(media_metadata)
239 |                 file_directory = reformat_item.reformat(
240 |                     site_config.download_setup.directory_format
241 |                 )
242 |                 reformat_item.directory = file_directory
243 |                 file_path = reformat_item.reformat(
244 |                     site_config.download_setup.filename_format
245 |                 )
246 |                 media_metadata.directory = file_directory
247 |                 media_metadata.filename = file_path.name
248 |                 media_metadatas.append(media_metadata)
249 |         current_job.done = True
250 | 
251 |     async def prepare_filesystem(self, performer: user_types):
252 |         await self.filesystem_manager.create_directory_manager(
253 |             self.site_config, performer  # type:ignore
254 |         )
255 |         await self.filesystem_manager.format_directories(performer)
256 |         metadata_manager = MetadataManager(
257 |             performer,
258 |             self.resolve_content_manager(performer),
259 |             self.filesystem_manager,
260 |         )
261 |         await metadata_manager.process_legacy_metadata()
262 |         self.metadata_manager_users[performer.id] = metadata_manager
263 |         return metadata_manager
264 | 
265 |     async def paid_content_scraper(self, authed: auth_types):
266 |         paid_contents = await authed.get_paid_content()
267 |         datascraper = self.datascraper
268 |         assert datascraper, "Datascraper not found"
269 |         unique_suppliers: set[user_types] = set()
270 |         for paid_content in paid_contents:
271 |             supplier = paid_content.get_author()
272 |             await self.prepare_filesystem(supplier)
273 |             content_manager = datascraper.resolve_content_manager(supplier)
274 |             content_type = paid_content.get_content_type()
275 |             result = await datascraper.media_scraper(
276 |                 paid_content, supplier, content_type  # type:ignore
277 |             )
278 |             content_manager.set_content(
279 |                 content_type,
280 |                 result["content"],
281 |             )
282 |             unique_suppliers.add(supplier)
283 | 
284 |         for supplier in unique_suppliers:
285 |             if isinstance(supplier, OnlyFansUserModel):
286 |                 content_manager = datascraper.resolve_content_manager(supplier)
287 |                 supplier.cache.messages.activate()
288 |                 contents = await supplier.get_mass_messages()
289 |                 supplier.cache.messages.deactivate()
290 | 
291 |                 for content in contents:
292 |                     content_type = content.get_content_type()
293 |                     result = await datascraper.media_scraper(
294 |                         content, supplier, content_type  # type:ignore
295 |                     )
296 |                     content_manager.set_content(
297 |                         content_type,
298 |                         result["content"],
299 |                     )
300 | 
301 |     async def prepare_scraper(
302 |         self,
303 |         user: user_types,
304 |         metadata_manager: MetadataManager,
305 |         content_type: str,
306 |         master_set: list[Any] = [],
307 |     ):
308 |         authed = user.get_authed()
309 |         current_job = user.get_current_job()
310 |         if not current_job:
311 |             return
312 |         temp_master_set: list[Any] = copy.copy(master_set)
313 |         if not temp_master_set and not current_job.ignore:
314 |             match content_type:
315 |                 case "Stories":
316 |                     temp_master_set.extend(await self.datascraper.get_all_stories(user))
317 |                     pass
318 |                 case "Posts":
319 |                     temp_master_set = await self.datascraper.get_all_posts(user)
320 |                 case "Messages":
321 |                     db_message = await find_earliest_non_downloaded_message(
322 |                         user, self.datascraper
323 |                     )
324 |                     cutoff_id = db_message.id if db_message else None
325 |                     temp_master_set = await user.get_messages(cutoff_id=cutoff_id)
326 |                 case "Chats":
327 |                     pass
328 |                 case "Highlights":
329 |                     pass
330 |                 case "MassMessages":
331 |                     if isinstance(authed, OnlyFansAuthModel):
332 |                         if user.is_authed_user():
333 |                             mass_message_stats = await authed.get_mass_message_stats()
334 |                             temp_master_set = []
335 |                             for mass_message_stat in mass_message_stats:
336 |                                 mass_message = (
337 |                                     await mass_message_stat.get_mass_message()
338 |                                 )
339 |                                 temp_master_set.append(mass_message)
340 |                         else:
341 |                             db_message = await find_earliest_non_downloaded_message(
342 |                                 user, self.datascraper
343 |                             )
344 |                             cutoff_id = db_message.id if db_message else None
345 |                             mass_messages = await user.get_mass_messages(
346 |                                 message_cutoff_id=cutoff_id
347 |                             )
348 |                             temp_master_set.extend(mass_messages)
349 |                 case _:
350 |                     raise Exception(f"{content_type} is an invalid choice")
351 |         # Adding paid content and removing duplicates by id
352 |         if isinstance(user, ultima_scraper_api.onlyfans_classes.user_model.create_user):
353 |             for paid_content in await user.get_paid_contents(content_type):
354 |                 temp_master_set.append(paid_content)
355 |             pass
356 |             temp_master_set = list(
357 |                 {getattr(obj, "id"): obj for obj in temp_master_set}.values()
358 |             )
359 |         await self.process_scraped_content(
360 |             temp_master_set, content_type, user, metadata_manager
361 |         )
362 |         current_job.done = True
363 | 
364 |     async def process_scraped_content(
365 |         self,
366 |         master_set: list[dict[str, Any]],
367 |         api_type: str,
368 |         subscription: user_types,
369 |         metadata_manager: MetadataManager,
370 |     ):
371 |         if not master_set:
372 |             return False
373 |         unrefined_set = []
374 |         tasks = [
375 |             asyncio.create_task(
376 |                 self.datascraper.media_scraper(x, subscription, api_type)  # type:ignore
377 |             )
378 |             for x in master_set
379 |         ]
380 |         unrefined_set: list[dict[str, Any]] = await ProgressBar(
381 |             f"Processing Scraped {api_type}"
382 |         ).gather(tasks)
383 |         new_metadata = metadata_manager.merge_content_and_directories(unrefined_set)
384 |         final_content, _final_directories = new_metadata
385 |         if new_metadata:
386 |             new_metadata_content = final_content
387 |             content_manager = self.resolve_content_manager(subscription)
388 |             content_manager.set_content(api_type, new_metadata_content)
389 |             if new_metadata_content:
390 |                 pass
391 |         else:
392 |             print(f"No {api_type} found.")
393 |         return True
394 | 
395 |     # Downloads scraped content
396 |     async def prepare_downloads(
397 |         self, performer: user_types, db_performer: DBUserModel, api_type: str
398 |     ):
399 |         site_db_api = self.server_manager.ultima_archive_db_api.find_site_api(
400 |             self.datascraper.api.site_name
401 |         )
402 |         current_job = performer.get_current_job()
403 |         global_settings = performer.get_api().get_global_settings()
404 |         filesystem_manager = self.datascraper.filesystem_manager
405 |         performer_directory_manager = filesystem_manager.get_directory_manager(
406 |             performer.id
407 |         )
408 |         filesystem_manager = self.datascraper.filesystem_manager
409 |         content_manager = self.resolve_content_manager(performer)
410 |         db_medias = db_performer.content_manager.get_media_manager().medias
411 |         final_download_set: set[MediaMetadata] = set()
412 |         for db_media in db_medias.values():
413 |             content_info = None
414 |             if api_type == "Uncategorized":
415 |                 await db_media.awaitable_attrs.content_media_assos
416 |                 if db_media.content_media_assos:
417 |                     continue
418 |                 if len(db_media.filepaths) > 1:
419 |                     continue
420 |             else:
421 |                 db_content = await db_media.find_content(api_type)
422 |                 if not db_content:
423 |                     continue
424 |                 content_info = (db_content.id, api_type)
425 |             db_filepath = db_media.find_filepath(content_info)
426 |             if db_filepath:
427 |                 if api_type == "Uncategorized":
428 |                     media_metadata = content_manager.media_manager.medias.get(
429 |                         db_media.id
430 |                     )
431 |                 else:
432 |                     media_metadata = content_manager.find_media(
433 |                         category=api_type, media_id=db_media.id
434 |                     )
435 |                 if media_metadata and media_metadata.urls:
436 |                     media_metadata.__db_media__ = db_media
437 |                     final_download_set.add(media_metadata)
438 |         total_media_count = len(final_download_set)
439 |         download_media_count = len(
440 |             [x for x in final_download_set if not x.get_filepath().exists()]
441 |         )
442 |         directory = performer_directory_manager.user.download_directory
443 |         if final_download_set:
444 |             string = "Processing Download:\n"
445 |             string += f"Name: {performer.username} | Type: {api_type} | Downloading: {download_media_count} | Total: {total_media_count} | Directory: {directory}\n"
446 |             print(string)
447 |         download_manager = DownloadManager(
448 |             performer.get_authed(),
449 |             filesystem_manager,
450 |             final_download_set,
451 |             global_settings.tools.reformatter.active,
452 |         )
453 |         await download_manager.bulk_download()
454 |         await site_db_api.schema.session.commit()
455 |         if final_download_set:
456 |             pass
457 |         if current_job:
458 |             current_job.done = True
459 | 
460 |     async def manage_subscriptions(
461 |         self,
462 |         authed: auth_types,
463 |         identifiers: list[int | str] = [],
464 |         refresh: bool = True,
465 |     ):
466 |         temp_subscriptions: list[subscription_types] = []
467 |         results = await self.datascraper.get_all_subscriptions(
468 |             authed, identifiers, refresh
469 |         )
470 |         site_settings = authed.api.get_site_settings()
471 |         if not site_settings:
472 |             return temp_subscriptions
473 |         results.sort(key=lambda x: x.user.is_me(), reverse=True)
474 |         for result in results:
475 |             temp_subscriptions.append(result)
476 |         authed.subscriptions = temp_subscriptions
477 |         return authed.subscriptions
478 | 
479 |     async def account_setup(
480 |         self,
481 |         auth: auth_types,
482 |         site_config: site_config_types,
483 |         identifiers: list[int | str] | list[str] = [],
484 |     ) -> tuple[bool, list[subscription_types]]:
485 |         status = False
486 |         subscriptions: list[subscription_types] = []
487 | 
488 |         if auth.is_authed() and site_config:
489 |             authed = auth
490 |             # metadata_filepath = (
491 |             #     authed.directory_manager.profile.metadata_directory.joinpath(
492 |             #         "Mass Messages.json"
493 |             #     )
494 |             # )
495 |             # if authed.isPerformer:
496 |             #     imported = main_helper.import_json(metadata_filepath)
497 |             #     if "auth" in imported:
498 |             #         imported = imported["auth"]
499 |             #     mass_messages = await authed.get_mass_messages(resume=imported)
500 |             #     if mass_messages:
501 |             #         main_helper.export_json(mass_messages, metadata_filepath)
502 |             authed.blacklist = await authed.get_blacklist(site_config.blacklists)
503 |             if identifiers or site_config.jobs.scrape.subscriptions:
504 |                 subscriptions.extend(
505 |                     await self.manage_subscriptions(
506 |                         authed, identifiers=identifiers  # type: ignore
507 |                     )
508 |                 )
509 |             status = True
510 |         return status, subscriptions
511 | 
512 |     async def get_chat_users(self):
513 |         chat_users: list[user_types] = []
514 |         for authed in self.datascraper.api.auths:
515 |             chats = await authed.get_chats()
516 |             for chat in chats:
517 |                 username: str = chat["withUser"].username
518 |                 subscription = await authed.get_subscription(identifier=username)
519 |                 if not subscription:
520 |                     subscription = chat["withUser"]
521 |                     chat_users.append(subscription)
522 |         return chat_users
523 | 
524 |     async def get_performer(self, authed: auth_types, db_performer: DBUserModel):
525 |         if authed.id == db_performer.id:
526 |             performer = authed.user
527 |         else:
528 |             subscriptions = await authed.get_subscriptions(
529 |                 identifiers=[db_performer.id]
530 |             )
531 |             if not subscriptions:
532 |                 paid_contents = await authed.get_paid_content(
533 |                     performer_id=db_performer.id
534 |                 )
535 |                 if not paid_contents:
536 |                     # performer = await authed.get_user(db_performer.id)
537 |                     # if not performer.subscribed_by_data:
538 |                     #     return None
539 |                     return None
540 |                 else:
541 |                     performer = [
542 |                         x.get_author()
543 |                         for x in paid_contents
544 |                         if x.get_author().id == db_performer.id
545 |                     ][0]
546 |                     temp_performer = await authed.get_user(performer.id, refresh=True)
547 |                     if not temp_performer:
548 |                         performer.is_deleted = True
549 |             else:
550 |                 performer = subscriptions[0].user
551 |                 if not performer.is_subscribed():
552 |                     paid_contents = await authed.get_paid_content(
553 |                         performer_id=db_performer.id
554 |                     )
555 |         if isinstance(
556 |             performer, ultima_scraper_api.onlyfans_classes.user_model.create_user
557 |         ):
558 |             if performer.is_blocked:
559 |                 await performer.unblock()
560 |         performer.add_aliases([x.username for x in db_performer.aliases])
561 |         performer.username = performer.get_usernames()[0]
562 |         return performer
563 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/projects/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/projects/__init__.py


--------------------------------------------------------------------------------
/ultima_scraper_collection/projects/project_manager.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from typing import Any
 3 | 
 4 | import paramiko
 5 | from sqlalchemy import MetaData
 6 | from sshtunnel import SSHTunnelForwarder  # type: ignore
 7 | from ultima_scraper_db.managers.database_manager import Alembica, DatabaseManager
 8 | 
 9 | 
10 | class Project:
11 |     def __init__(
12 |         self,
13 |         name: str,
14 |     ) -> None:
15 |         self.name = name
16 |         self.db_manager = DatabaseManager()
17 | 
18 |     def handle_ssh(self, db_info: dict[str, Any]):
19 |         ssh_auth_info = db_info["ssh"]
20 |         if ssh_auth_info["host"]:
21 |             private_key_filepath = ssh_auth_info["private_key_filepath"]
22 |             ssh_private_key_password = ssh_auth_info["private_key_password"]
23 |             private_key = (
24 |                 paramiko.RSAKey.from_private_key_file(
25 |                     private_key_filepath, ssh_private_key_password
26 |                 ).key
27 |                 if private_key_filepath
28 |                 else None
29 |             )
30 |             random_port = random.randint(6000, 6999)
31 |             ssh_obj = SSHTunnelForwarder(
32 |                 (ssh_auth_info["host"], ssh_auth_info["port"]),
33 |                 ssh_username=ssh_auth_info["username"],
34 |                 ssh_pkey=private_key,
35 |                 ssh_private_key_password=ssh_private_key_password,
36 |                 remote_bind_address=(db_info["host"], db_info["port"]),
37 |                 local_bind_address=(db_info["host"], random_port),
38 |             )
39 |             db_info["ssh"] = ssh_obj
40 |         else:
41 |             db_info["ssh"] = None
42 |         return db_info
43 | 
44 |     async def _init_db(
45 |         self,
46 |         db_info: dict[str, Any],
47 |         alembica: Alembica,
48 |         metadata: MetaData = MetaData(),
49 |         echo: bool = False,
50 |         upgrade: bool = False,
51 |     ):
52 |         if upgrade:
53 |             alembica.is_generate = True
54 |             alembica.is_migrate = True
55 |         else:
56 |             alembica.is_generate = False
57 |             alembica.is_migrate = False
58 |         db_info = self.handle_ssh(db_info)
59 |         temp_database = self.db_manager.create_database(
60 |             **db_info, metadata=metadata, alembica=alembica
61 |         )
62 |         self.db_manager.add_database(temp_database)
63 |         await temp_database.init_db(echo)
64 |         return temp_database
65 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/projects/ultima_archive.py:
--------------------------------------------------------------------------------
 1 | from ultima_scraper_collection.config import UltimaScraperCollectionConfig
 2 | from ultima_scraper_collection.projects.project_manager import Project
 3 | from ultima_scraper_db.databases.ultima_archive import merged_metadata
 4 | from ultima_scraper_db.databases.ultima_archive.api.client import UAClient
 5 | from ultima_scraper_db.databases.ultima_archive.database_api import ArchiveAPI
 6 | from ultima_scraper_db.managers.database_manager import Alembica
 7 | 
 8 | 
 9 | class UltimaArchiveProject(Project):
10 |     async def init(self, config: UltimaScraperCollectionConfig):
11 |         # We could pass a database manager instead of config
12 |         db_info = config.settings.databases[0].connection_info.dict()
13 |         ultima_archive_db = await super()._init_db(db_info, Alembica(), merged_metadata)
14 |         self.ultima_archive_db_api = await ArchiveAPI(ultima_archive_db).init()
15 |         self.fast_api = UAClient(self.ultima_archive_db_api)
16 |         UAClient.database_api = self.ultima_archive_db_api
17 |         UAClient.config = config
18 |         return self
19 | 


--------------------------------------------------------------------------------
/ultima_scraper_collection/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DATAHOARDERS/UltimaScraperCollection/e1a85eaf8ef79c6e62d6adc20d9f3989d9b86e8c/ultima_scraper_collection/py.typed


--------------------------------------------------------------------------------