├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── LICENSE.txt ├── Procfile ├── README.md ├── app ├── __init__.py ├── helpers.py ├── main.py ├── schemas.py └── settings.py ├── cleanbay ├── __init__.py ├── abstract_plugin.py ├── backend.py ├── cache_manager │ ├── __init__.py │ ├── abstract_cache_manager.py │ └── lfu_cache.py ├── plugins │ ├── __init__.py │ ├── eztv.py │ ├── leetx.py.disabled │ ├── libgen.py │ ├── linuxtracker.py │ ├── nyaa.py │ ├── piratebay.py │ └── yts.py ├── plugins_manager │ ├── __init__.py │ └── plugins_manager.py └── torrent.py ├── poetry.lock ├── pyproject.toml └── test_app.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .env 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # Unit test / coverage reports 33 | htmlcov/ 34 | .tox/ 35 | .nox/ 36 | .coverage 37 | .coverage.* 38 | .cache 39 | nosetests.xml 40 | coverage.xml 41 | *.cover 42 | *.py,cover 43 | .hypothesis/ 44 | .pytest_cache/ 45 | cover/ 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Flask stuff: 52 | instance/ 53 | .webassets-cache 54 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 24.3.0 4 | hooks: 5 | - id: black 6 | language_version: python3.11 7 | - repo: local 8 | hooks: 9 | - id: pylint 10 | name: pylint 11 | entry: pylint 12 | language: system 13 | types: [python] 14 | args: 15 | [ 16 | "-rn", # Only display messages 17 | "-sn", # Don't display the score 18 | ] -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | # This Pylint rcfile contains a best-effort configuration to uphold the 2 | # best-practices and style described in the Google Python style guide: 3 | # https://google.github.io/styleguide/pyguide.html 4 | # 5 | # Its canonical open-source location is: 6 | # https://google.github.io/styleguide/pylintrc 7 | 8 | [MAIN] 9 | 10 | # Files or directories to be skipped. They should be base names, not paths. 11 | ignore=third_party 12 | 13 | # Files or directories matching the regex patterns are skipped. The regex 14 | # matches against base names, not paths. 15 | ignore-patterns= 16 | 17 | # Pickle collected data for later comparisons. 18 | persistent=no 19 | 20 | # List of plugins (as comma separated values of python modules names) to load, 21 | # usually to register additional checkers. 22 | load-plugins= 23 | 24 | # Use multiple processes to speed up Pylint. 25 | jobs=4 26 | 27 | # Allow loading of arbitrary C extensions. Extensions are imported into the 28 | # active Python interpreter and may run arbitrary code. 29 | unsafe-load-any-extension=no 30 | 31 | 32 | [MESSAGES CONTROL] 33 | 34 | # Only show warnings with the listed confidence levels. Leave empty to show 35 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED 36 | confidence= 37 | 38 | # Enable the message, report, category or checker with the given id(s). You can 39 | # either give multiple identifier separated by comma (,) or put this option 40 | # multiple time (only on the command line, not in the configuration file where 41 | # it should appear only once). See also the "--disable" option for examples. 42 | #enable= 43 | 44 | # Disable the message, report, category or checker with the given id(s). You 45 | # can either give multiple identifiers separated by comma (,) or put this 46 | # option multiple times (only on the command line, not in the configuration 47 | # file where it should appear only once).You can also use "--disable=all" to 48 | # disable everything first and then reenable specific checks. For example, if 49 | # you want to run only the similarities checker, you can use "--disable=all 50 | # --enable=similarities". If you want to run only the classes checker, but have 51 | # no Warning level messages displayed, use"--disable=all --enable=classes 52 | # --disable=W" 53 | disable=R, 54 | abstract-method, 55 | apply-builtin, 56 | arguments-differ, 57 | attribute-defined-outside-init, 58 | backtick, 59 | bad-option-value, 60 | basestring-builtin, 61 | buffer-builtin, 62 | c-extension-no-member, 63 | consider-using-enumerate, 64 | cmp-builtin, 65 | cmp-method, 66 | coerce-builtin, 67 | coerce-method, 68 | delslice-method, 69 | div-method, 70 | eq-without-hash, 71 | execfile-builtin, 72 | file-builtin, 73 | filter-builtin-not-iterating, 74 | fixme, 75 | getslice-method, 76 | global-statement, 77 | hex-method, 78 | idiv-method, 79 | implicit-str-concat, 80 | import-error, 81 | import-self, 82 | import-star-module-level, 83 | input-builtin, 84 | intern-builtin, 85 | invalid-str-codec, 86 | locally-disabled, 87 | long-builtin, 88 | long-suffix, 89 | map-builtin-not-iterating, 90 | misplaced-comparison-constant, 91 | missing-function-docstring, 92 | metaclass-assignment, 93 | next-method-called, 94 | next-method-defined, 95 | no-absolute-import, 96 | no-init, # added 97 | no-member, 98 | no-name-in-module, 99 | no-self-use, 100 | nonzero-method, 101 | oct-method, 102 | old-division, 103 | old-ne-operator, 104 | old-octal-literal, 105 | old-raise-syntax, 106 | parameter-unpacking, 107 | print-statement, 108 | raising-string, 109 | range-builtin-not-iterating, 110 | raw_input-builtin, 111 | rdiv-method, 112 | reduce-builtin, 113 | relative-import, 114 | reload-builtin, 115 | round-builtin, 116 | setslice-method, 117 | signature-differs, 118 | standarderror-builtin, 119 | suppressed-message, 120 | sys-max-int, 121 | trailing-newlines, 122 | unichr-builtin, 123 | unicode-builtin, 124 | unnecessary-pass, 125 | unpacking-in-except, 126 | useless-else-on-loop, 127 | useless-suppression, 128 | using-cmp-argument, 129 | wrong-import-order, 130 | xrange-builtin, 131 | zip-builtin-not-iterating, 132 | 133 | 134 | [REPORTS] 135 | 136 | # Set the output format. Available formats are text, parseable, colorized, msvs 137 | # (visual studio) and html. You can also give a reporter class, eg 138 | # mypackage.mymodule.MyReporterClass. 139 | output-format=text 140 | 141 | # Tells whether to display a full report or only the messages 142 | reports=no 143 | 144 | # Python expression which should return a note less than 10 (10 is the highest 145 | # note). You have access to the variables errors warning, statement which 146 | # respectively contain the number of errors / warnings messages and the total 147 | # number of statements analyzed. This is used by the global evaluation report 148 | # (RP0004). 149 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 150 | 151 | # Template used to display messages. This is a python new-style format string 152 | # used to format the message information. See doc for all details 153 | #msg-template= 154 | 155 | 156 | [BASIC] 157 | 158 | # Good variable names which should always be accepted, separated by a comma 159 | good-names=main,_ 160 | 161 | # Bad variable names which should always be refused, separated by a comma 162 | bad-names= 163 | 164 | # Colon-delimited sets of names that determine each other's naming style when 165 | # the name regexes allow several styles. 166 | name-group= 167 | 168 | # Include a hint for the correct naming format with invalid-name 169 | include-naming-hint=no 170 | 171 | # List of decorators that produce properties, such as abc.abstractproperty. Add 172 | # to this list to register other decorators that produce valid properties. 173 | property-classes=abc.abstractproperty,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl 174 | 175 | # Regular expression matching correct function names 176 | function-rgx=^(?:(?PsetUp|tearDown|setUpModule|tearDownModule)|(?P_?[A-Z][a-zA-Z0-9]*)|(?P_?[a-z][a-z0-9_]*))$ 177 | 178 | # Regular expression matching correct variable names 179 | variable-rgx=^[a-z][a-z0-9_]*$ 180 | 181 | # Regular expression matching correct constant names 182 | const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ 183 | 184 | # Regular expression matching correct attribute names 185 | attr-rgx=^_{0,2}[a-z][a-z0-9_]*$ 186 | 187 | # Regular expression matching correct argument names 188 | argument-rgx=^[a-z][a-z0-9_]*$ 189 | 190 | # Regular expression matching correct class attribute names 191 | class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ 192 | 193 | # Regular expression matching correct inline iteration names 194 | inlinevar-rgx=^[a-z][a-z0-9_]*$ 195 | 196 | # Regular expression matching correct class names 197 | class-rgx=^_?[A-Z][a-zA-Z0-9]*$ 198 | 199 | # Regular expression matching correct module names 200 | module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$ 201 | 202 | # Regular expression matching correct method names 203 | method-rgx=(?x)^(?:(?P_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P_{0,2}[a-z][a-z0-9_]*))$ 204 | 205 | # Regular expression which should only match function or class names that do 206 | # not require a docstring. 207 | no-docstring-rgx=(__.*__|main|test.*|.*test|.*Test)$ 208 | 209 | # Minimum line length for functions/classes that require docstrings, shorter 210 | # ones are exempt. 211 | docstring-min-length=12 212 | 213 | 214 | [TYPECHECK] 215 | 216 | # List of decorators that produce context managers, such as 217 | # contextlib.contextmanager. Add to this list to register other decorators that 218 | # produce valid context managers. 219 | contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager 220 | 221 | # List of module names for which member attributes should not be checked 222 | # (useful for modules/projects where namespaces are manipulated during runtime 223 | # and thus existing member attributes cannot be deduced by static analysis. It 224 | # supports qualified module names, as well as Unix pattern matching. 225 | ignored-modules= 226 | 227 | # List of class names for which member attributes should not be checked (useful 228 | # for classes with dynamically set attributes). This supports the use of 229 | # qualified names. 230 | ignored-classes=optparse.Values,thread._local,_thread._local 231 | 232 | # List of members which are set dynamically and missed by pylint inference 233 | # system, and so shouldn't trigger E1101 when accessed. Python regular 234 | # expressions are accepted. 235 | generated-members= 236 | 237 | 238 | [FORMAT] 239 | 240 | # Maximum number of characters on a single line. 241 | max-line-length=88 242 | 243 | # TODO(https://github.com/pylint-dev/pylint/issues/3352): Direct pylint to exempt 244 | # lines made too long by directives to pytype. 245 | 246 | # Regexp for a line that is allowed to be longer than the limit. 247 | ignore-long-lines=(?x)( 248 | ^\s*(\#\ )??$| 249 | ^\s*(from\s+\S+\s+)?import\s+.+$) 250 | 251 | # Allow the body of an if to be on the same line as the test if there is no 252 | # else. 253 | single-line-if-stmt=yes 254 | 255 | # Maximum number of lines in a module 256 | max-module-lines=99999 257 | 258 | # String used as indentation unit. The internal Google style guide mandates 2 259 | # spaces. Google's externaly-published style guide says 4, consistent with 260 | # PEP 8. Here, we use 2 spaces, for conformity with many open-sourced Google 261 | # projects (like TensorFlow). 262 | indent-string=' ' 263 | 264 | # Number of spaces of indent required inside a hanging or continued line. 265 | indent-after-paren=4 266 | 267 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 268 | expected-line-ending-format= 269 | 270 | 271 | [MISCELLANEOUS] 272 | 273 | # List of note tags to take in consideration, separated by a comma. 274 | notes=TODO 275 | 276 | 277 | [STRING] 278 | 279 | # This flag controls whether inconsistent-quotes generates a warning when the 280 | # character used as a quote delimiter is used inconsistently within a module. 281 | check-quote-consistency=yes 282 | 283 | 284 | [VARIABLES] 285 | 286 | # Tells whether we should check for unused import in __init__ files. 287 | init-import=no 288 | 289 | # A regular expression matching the name of dummy variables (i.e. expectedly 290 | # not used). 291 | dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_) 292 | 293 | # List of additional names supposed to be defined in builtins. Remember that 294 | # you should avoid to define new builtins when possible. 295 | additional-builtins= 296 | 297 | # List of strings which can identify a callback function by name. A callback 298 | # name must start or end with one of those strings. 299 | callbacks=cb_,_cb 300 | 301 | # List of qualified module names which can have objects that can redefine 302 | # builtins. 303 | redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools 304 | 305 | 306 | [LOGGING] 307 | 308 | # Logging modules to check that the string format arguments are in logging 309 | # function parameter format 310 | logging-modules=logging,absl.logging,tensorflow.io.logging 311 | 312 | 313 | [SIMILARITIES] 314 | 315 | # Minimum lines number of a similarity. 316 | min-similarity-lines=4 317 | 318 | # Ignore comments when computing similarities. 319 | ignore-comments=yes 320 | 321 | # Ignore docstrings when computing similarities. 322 | ignore-docstrings=yes 323 | 324 | # Ignore imports when computing similarities. 325 | ignore-imports=no 326 | 327 | 328 | [SPELLING] 329 | 330 | # Spelling dictionary name. Available dictionaries: none. To make it working 331 | # install python-enchant package. 332 | spelling-dict= 333 | 334 | # List of comma separated words that should not be checked. 335 | spelling-ignore-words= 336 | 337 | # A path to a file that contains private dictionary; one word per line. 338 | spelling-private-dict-file= 339 | 340 | # Tells whether to store unknown words to indicated private dictionary in 341 | # --spelling-private-dict-file option instead of raising a message. 342 | spelling-store-unknown-words=no 343 | 344 | 345 | [IMPORTS] 346 | 347 | # Deprecated modules which should not be used, separated by a comma 348 | deprecated-modules=regsub, 349 | TERMIOS, 350 | Bastion, 351 | rexec, 352 | sets 353 | 354 | # Create a graph of every (i.e. internal and external) dependencies in the 355 | # given file (report RP0402 must not be disabled) 356 | import-graph= 357 | 358 | # Create a graph of external dependencies in the given file (report RP0402 must 359 | # not be disabled) 360 | ext-import-graph= 361 | 362 | # Create a graph of internal dependencies in the given file (report RP0402 must 363 | # not be disabled) 364 | int-import-graph= 365 | 366 | # Force import order to recognize a module as part of the standard 367 | # compatibility libraries. 368 | known-standard-library= 369 | 370 | # Force import order to recognize a module as part of a third party library. 371 | known-third-party=enchant, absl 372 | 373 | # Analyse import fallback blocks. This can be used to support both Python 2 and 374 | # 3 compatible code, which means that the block might have code that exists 375 | # only in one or another interpreter, leading to false positives when analysed. 376 | analyse-fallback-blocks=no 377 | 378 | 379 | [CLASSES] 380 | 381 | # List of method names used to declare (i.e. assign) instance attributes. 382 | defining-attr-methods=__init__, 383 | __new__, 384 | setUp 385 | 386 | # List of member names, which should be excluded from the protected access 387 | # warning. 388 | exclude-protected=_asdict, 389 | _fields, 390 | _replace, 391 | _source, 392 | _make 393 | 394 | # List of valid names for the first argument in a class method. 395 | valid-classmethod-first-arg=cls, 396 | class_ 397 | 398 | # List of valid names for the first argument in a metaclass class method. 399 | valid-metaclass-classmethod-first-arg=mcs 400 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Gr3atWh173 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn -w 3 -k uvicorn.workers.UvicornWorker app.main:app -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cleanbay 2 | 3 | A metasearch engine for torrents 4 | 5 | **Note**: The frontend code lives in 6 | [this repo](https://github.com/gr3atwh173/cleanbay-frontend) 7 | 8 | 0. [Supported trackers](#supported-trackers) 9 | 1. [Live instances](#live-instances) 10 | 2. [Setup](#setup) 11 | 3. [API Endpoints](#api-endpoints) 12 | 4. [Contributing](#contributing) 13 | 14 | ## Supported trackers 15 | 16 | Currently supported trackers are: 17 | 18 | 1. Piratebay 19 | 2. YTS 20 | 3. EZTV 21 | 4. LinuxTracker 22 | 5. Libgen 23 | 6. Nyaa 24 | 25 | ## Live instances 26 | 27 | You can find a running instance at: 28 | 29 | 1. https://testbay.onrender.com or, 30 | 2. https://cleanbay.netlify.app if you prefer a frontend 31 | 32 | ## Setup 33 | 34 | 1. Clone this repo 35 | 36 | ``` 37 | git clone https://github.com/gr3atwh173/cleanbay.git 38 | ``` 39 | 40 | 2. Install with [Poetry](https://pypi.org/project/poetry/) 41 | 42 | ``` 43 | cd cleanbay 44 | poetry install 45 | ``` 46 | 47 | _Optional_: Create a `.env` file with the following parameters in the project 48 | root: 49 | 50 | ``` 51 | # directory where the plugins are located 52 | # must have a __init__.py file 53 | PLUGINS_DIRECTORY="./backend/plugins" 54 | 55 | # rate limiting by IP 56 | RATE_LIMIT="100/minute" 57 | 58 | # cache size in 'entries' 59 | CACHE_SIZE=128 60 | 61 | # time (in seconds) before a cache item is invalidated 62 | CACHE_TIMEOUT=300 63 | 64 | # domain allowed to make cross-origin requests to the server 65 | # '*' allows for any domain to request data 66 | ALLOWED_ORIGIN="*" 67 | ``` 68 | 69 | 3. Run the web API 70 | 71 | ``` 72 | poetry run uvicorn app.main:app 73 | ``` 74 | 75 | ## API endpoints 76 | 77 | >**NOTE**: See the [auto-generated swagger docs](https://testbay.onrender.com/docs) for more up-to-date documentation 78 | 79 | 1. `POST /api/v1/search/` expects 80 | 81 | ```json 82 | { 83 | "search_term": "...", 84 | "include_categories": ["cinema", "tv"], 85 | "exclude_categories": [], 86 | "include_sites": ["linuxtracker", "piratebay"], 87 | "exclude_sites": [] 88 | } 89 | ``` 90 | 91 | and returns JSON with the following structure: 92 | 93 | ```json 94 | { 95 | "status": "ok", 96 | "length": 123, 97 | "cache_hit": true, 98 | "elapsed": 2.324, 99 | "data": [ 100 | { 101 | "name": "...", 102 | "magnet": "...", 103 | "seeders": 12345, 104 | "leechers": 1234, 105 | "size": "...", 106 | "uploader": "...", 107 | "uploaded_at": "..." 108 | } 109 | ] 110 | } 111 | ``` 112 | 113 | in case of an error, the following is returned: 114 | 115 | ```json 116 | { 117 | "status": "error", 118 | "msg": "why it happened" 119 | } 120 | ``` 121 | 122 | --- 123 | 124 | **NOTE** 125 | 126 | Categories are mapped like so: 127 | 128 | ``` 129 | "all" or "*" => ALL: Everything under the sun 130 | "general" => GENERAL: Plugins that track everything 131 | "cinema" => CINEMA: Plugins that track movies 132 | "tv" => TV: Plugins that track shows on TV, OTT or anything that's not a movie 133 | "software" => SOFTWARE: Plugins that track software excluding games 134 | "books" => BOOKS: Plugins that index books or audiobooks 135 | ``` 136 | 137 | --- 138 | 139 | 2. `GET /api/v1/status` returns JSON with the following structure 140 | 141 | ```json 142 | { 143 | "status": "ok", // or "not ok" 144 | "plugins": ["loaded", "plugins"] 145 | } 146 | ``` 147 | 148 | ## Contributing 149 | 150 | ### How you can contribute 151 | 152 | This is a non-exhaustive list: 153 | 154 | 1. Make a plugin (or two, or three, or four...) 155 | 2. Add new features to the backend, or make existing ones better! 156 | 3. Make a better frontend. 157 | 4. Write better documentation for the API. 158 | 5. Bug fixes, refactors, etc. 159 | 6. Suggest a feature. 160 | 161 | In any case, thanks for contributing! 162 | 163 | ### How to contribute 164 | 165 | Before making a change, please first discuss the change you want to make via raising an issue. 166 | 167 | 1. Fork and clone the repo 168 | 2. Run `poetry install` to install tha dependencies 169 | 3. Create a branch for your PR with `git checkout -b your-branch-name` 170 | 4. Code your changes. 171 | 5. Push the changes to your fork 172 | 6. Make a pull request! 173 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gr3atWh173/cleanbay/f61a72e5d7ab7190db1c66241d425660d34dbc9e/app/__init__.py -------------------------------------------------------------------------------- /app/helpers.py: -------------------------------------------------------------------------------- 1 | """Contains helper functions for the API""" 2 | 3 | from typing import Tuple 4 | 5 | from cleanbay.torrent import Category 6 | 7 | from app.schemas import SearchIn, CATEGORY_MAP 8 | 9 | 10 | def parse_search_query(sq: SearchIn) -> Tuple: 11 | s_term = sq.search_term 12 | 13 | # if there's 'all' in the include category list, treat it as if the list was 14 | # empty, ie, include everything 15 | i_cats = [] 16 | if any(x in sq.include_categories for x in ["all", "*"]): 17 | i_cats = CATEGORY_MAP.values() 18 | i_cats.remove(Category.ALL) 19 | else: 20 | i_cats = [CATEGORY_MAP[cat] for cat in sq.include_categories] 21 | 22 | e_cats = [CATEGORY_MAP[cat] for cat in sq.exclude_categories] 23 | 24 | i_sites = sq.include_sites 25 | e_sites = sq.exclude_sites 26 | 27 | return (s_term, i_cats, e_cats, i_sites, e_sites) 28 | -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- 1 | """Serves the API that enables searching the backend""" 2 | 3 | from itertools import chain 4 | from datetime import datetime 5 | 6 | from fastapi import FastAPI, HTTPException, Request, Response 7 | from fastapi.responses import JSONResponse 8 | from fastapi.middleware.cors import CORSMiddleware 9 | 10 | from slowapi import Limiter, _rate_limit_exceeded_handler 11 | from slowapi.util import get_remote_address 12 | from slowapi.errors import RateLimitExceeded 13 | 14 | from cleanbay.backend import Backend, InvalidSearchError 15 | from cleanbay.plugins_manager import NoPluginsError, PluginsManager 16 | from cleanbay.cache_manager import LFUCache 17 | 18 | from app.settings import settings 19 | from app.schemas import SearchIn, SearchOut, SearchError, StatusOut 20 | from app.helpers import parse_search_query 21 | 22 | # initialize tha app and the backend 23 | cache_manager = LFUCache(settings.cache_size, settings.cache_timeout) 24 | plugins_manager = PluginsManager(settings.plugins_directory) 25 | backend = Backend(settings.session_timeout, cache_manager, plugins_manager) 26 | 27 | app = FastAPI() 28 | limiter = Limiter(key_func=get_remote_address) 29 | app.state.limiter = limiter 30 | app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) 31 | app.add_middleware( 32 | CORSMiddleware, 33 | allow_origins=[settings.allowed_origin], 34 | allow_credentials=True, 35 | allow_methods=["*"], 36 | allow_headers=["*"], 37 | ) 38 | 39 | 40 | # this is primarily for backwards compat with the frontend 41 | @app.exception_handler(HTTPException) 42 | def http_exception_handler( 43 | req: Request, exc: HTTPException # pylint: disable=unused-argument 44 | ) -> JSONResponse: 45 | return JSONResponse({"status": "error", "msg": exc.detail}, exc.status_code) 46 | 47 | 48 | # routes 49 | 50 | 51 | @app.get("/api/v1/status", response_model=StatusOut) 52 | @limiter.limit(settings.rate_limit) 53 | def status(request: Request, response: Response): # pylint: disable=unused-argument 54 | """Returns the current status and list of available plugins""" 55 | plugins, is_ok = backend.state() 56 | status_word = "ok" if is_ok else "not ok" 57 | 58 | return StatusOut(status=status_word, plugins=list(plugins)) 59 | 60 | 61 | @app.post( 62 | "/api/v1/search", 63 | response_model=SearchOut, 64 | responses={422: {"model": SearchError}}, 65 | ) 66 | @limiter.limit(settings.rate_limit) 67 | async def search( 68 | request: Request, response: Response, sq: SearchIn 69 | ): # pylint: disable=unused-argument 70 | """Searches the relevant plugins for torrents""" 71 | is_valid, msg = validate(sq) 72 | if not is_valid: 73 | response.status_code = 422 74 | raise HTTPException(status_code=422, detail=msg) 75 | 76 | s_term, i_cats, e_cats, i_sites, e_sites = parse_search_query(sq) 77 | 78 | start_time = datetime.now() 79 | try: 80 | listings, cache_hit = await backend.search( 81 | search_term=s_term, 82 | include_categories=i_cats, 83 | exclude_categories=e_cats, 84 | include_sites=i_sites, 85 | exclude_sites=e_sites, 86 | ) 87 | except NoPluginsError as exc: 88 | raise HTTPException(status_code=500, detail="No searchable plugins.") from exc 89 | except InvalidSearchError as exc: 90 | response.status_code = 400 91 | raise HTTPException(status_code=422, detail="Invalid search.") from exc 92 | elapsed = datetime.now() - start_time 93 | 94 | return SearchOut( 95 | status="ok", data=listings, cache_hit=cache_hit, elapsed=elapsed.total_seconds() 96 | ) 97 | 98 | 99 | def validate(sq: SearchIn) -> bool: 100 | indexed_sites = list(backend.state()[0]) 101 | for site in chain(sq.include_sites, sq.exclude_sites): 102 | if site not in indexed_sites: 103 | or_string = f'{", ".join(indexed_sites[:-1])} or {indexed_sites[-1]}' 104 | return ( 105 | False, 106 | f'For now, "{site}" is not indexed. Perhaps you meant {or_string}', 107 | ) 108 | return True, "" 109 | 110 | 111 | if __name__ == "__main__": 112 | import uvicorn 113 | 114 | uvicorn.run(app, reload=True) 115 | -------------------------------------------------------------------------------- /app/schemas.py: -------------------------------------------------------------------------------- 1 | """Contains the request and response models for the API""" 2 | 3 | from typing import List 4 | 5 | from fastapi import HTTPException 6 | 7 | from pydantic import BaseModel, field_validator, model_validator, computed_field 8 | 9 | from cleanbay.torrent import Category, Torrent 10 | 11 | CATEGORY_MAP = { 12 | "all": Category.ALL, 13 | "general": Category.GENERAL, 14 | "cinema": Category.CINEMA, 15 | "tv": Category.TV, 16 | "software": Category.SOFTWARE, 17 | "books": Category.BOOKS, 18 | } 19 | 20 | 21 | class SearchIn(BaseModel): 22 | """Used to deserialize the JSON received in the request body 23 | 24 | Attributes: 25 | search_term (str): The string to search for 26 | include_categories (list): Categories in which to search 27 | exclude_categories (list): Categories in which to not search 28 | include_sites (list): Plugins/services to search 29 | exclude_sites (list): Plugins/services to not search 30 | 31 | """ 32 | 33 | search_term: str 34 | include_categories: List[str] = [] 35 | exclude_categories: List[str] = [] 36 | include_sites: List[str] = [] 37 | exclude_sites: List[str] = [] 38 | 39 | @field_validator("search_term") 40 | @classmethod 41 | def validate_search_term_not_empty(cls, search_term: str) -> str: 42 | if search_term.strip() == "": 43 | raise HTTPException(status_code=422, detail="No search term given.") 44 | return search_term 45 | 46 | @field_validator("include_categories", "exclude_categories") 47 | @classmethod 48 | def validate_category_names(cls, category_list: list) -> list: 49 | invalid_categories = list( 50 | filter(lambda cat: cat not in CATEGORY_MAP, category_list) 51 | ) 52 | if invalid_categories: 53 | categories = list(CATEGORY_MAP.keys()) 54 | or_string = f"{', '.join(categories[:-1])} or {categories[-1]}" 55 | raise HTTPException( 56 | status_code=422, 57 | # pylint: disable=line-too-long 58 | detail=f"No such categories: {', '.join(invalid_categories)}. Perhaps you meant {or_string}", 59 | ) 60 | return category_list 61 | 62 | @model_validator(mode="after") 63 | def validate_filter_variant_exclusivity(self) -> "SearchIn": 64 | if self.include_categories and self.exclude_categories: 65 | raise HTTPException( 66 | status_code=422, 67 | detail="Cannot use include and exclude categories together.", 68 | ) 69 | if self.include_sites and self.exclude_sites: 70 | raise HTTPException( 71 | status_code=422, detail="Cannot use include and exclude sites together." 72 | ) 73 | return self 74 | 75 | 76 | class SearchOut(BaseModel): 77 | status: str = "ok" 78 | cache_hit: bool 79 | elapsed: float 80 | data: List[Torrent] 81 | 82 | @computed_field 83 | @property 84 | def length(self) -> int: 85 | return len(self.data) 86 | 87 | 88 | class SearchError(BaseModel): 89 | status: str 90 | msg: str 91 | 92 | 93 | class StatusOut(BaseModel): 94 | status: str 95 | plugins: List[str] 96 | -------------------------------------------------------------------------------- /app/settings.py: -------------------------------------------------------------------------------- 1 | """Contains the app settings""" 2 | 3 | from pydantic_settings import BaseSettings 4 | 5 | 6 | class Settings(BaseSettings): 7 | """Loads and represents app config from environment variables. 8 | 9 | Attributes: 10 | plugins_directory (str): The directory where plugin files are stored 11 | cache_size (int): Size for the cache 12 | cache_timeout (int): How long the cache maintains an entry (in seconds) 13 | session_timeout (int): Timeout for requests to external services (in seconds) 14 | rate_limit (str): Rate limit descriptor 15 | allowed_origin (str): Origin from which requests are allowed 16 | 17 | """ 18 | 19 | plugins_directory: str = "./cleanbay/plugins" 20 | cache_size: int = 128 21 | cache_timeout: int = 300 22 | session_timeout: int = 8 23 | rate_limit: str = "100/minute" 24 | allowed_origin: str = "*" 25 | 26 | 27 | settings = Settings() 28 | -------------------------------------------------------------------------------- /cleanbay/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gr3atWh173/cleanbay/f61a72e5d7ab7190db1c66241d425660d34dbc9e/cleanbay/__init__.py -------------------------------------------------------------------------------- /cleanbay/abstract_plugin.py: -------------------------------------------------------------------------------- 1 | """The module contains the abstract interface for plugins""" 2 | from abc import ABC, abstractmethod 3 | import asyncio # pylint: disable=unused-import 4 | import aiohttp 5 | 6 | 7 | class AbstractPlugin(ABC): 8 | """All plugins must be derived from this abstract class.""" 9 | 10 | @abstractmethod 11 | def verify_status(self) -> bool: 12 | """Verifies the status of the external service used by the plugin. 13 | 14 | Returns `True` only if said service is online and usable; 15 | 'False' otherwise. 16 | 17 | """ 18 | pass 19 | 20 | @abstractmethod 21 | async def search(self, session: aiohttp.ClientSession, search_param: str) -> list: 22 | """Searches the external service. 23 | 24 | Args: 25 | session (aiohttp.ClientSession): a session object that the plugin can use 26 | to access the web. 27 | search_param (str): the string to search for. 28 | 29 | """ 30 | pass 31 | 32 | @abstractmethod 33 | def info(self) -> dict: 34 | """Gives metadata about the plugin 35 | 36 | Must include 'name' and 'category' keys. 37 | 38 | """ 39 | pass 40 | -------------------------------------------------------------------------------- /cleanbay/backend.py: -------------------------------------------------------------------------------- 1 | """Manages the plugins and the cache.""" 2 | 3 | import asyncio 4 | 5 | from aiohttp import ClientSession, ClientTimeout, TCPConnector 6 | 7 | from typing import Tuple 8 | 9 | from .cache_manager import AbstractCacheManager 10 | from .plugins_manager import PluginsManager 11 | 12 | 13 | class InvalidSearchError(Exception): 14 | """Indicates that the search parameters were invalid.""" 15 | 16 | pass 17 | 18 | 19 | class Backend: 20 | """This class handles all behind-the-scenes logic. 21 | 22 | Handle the loading of the config and the plugins as well as searching each of 23 | the plugins asynchronusly. 24 | 25 | Attributes: 26 | config (dict): All the configuration information including which directory 27 | the plugins are in and what the cache size should be. 28 | plugins (dict): All the usable plugins hashed with their name. 29 | cache (dict): A simplistic lFU cache implementation. 30 | 31 | """ 32 | 33 | def __init__( 34 | self, 35 | request_timeout: int, 36 | cache_manager: AbstractCacheManager, 37 | plugins_manager: PluginsManager, 38 | ): 39 | """Initializes the backend object. 40 | 41 | Arguments: 42 | request_timeout (int): Timeout for requests to external services (in seconds) 43 | cache_manager (AbstractCacheManager): A concrete impl for a cache 44 | plugins_manager (PluginsManager): A concrete impl for managing plugins. 45 | 46 | """ 47 | self.timeout = request_timeout 48 | self.cache = cache_manager 49 | self.plugins_manager = plugins_manager 50 | 51 | def state(self): 52 | plugins = self.plugins_manager.plugins.keys() 53 | is_ok = bool(plugins) 54 | 55 | return (plugins, is_ok) 56 | 57 | async def search( 58 | self, 59 | search_term: str, 60 | include_categories: list, 61 | exclude_categories: list, 62 | include_sites: list, 63 | exclude_sites: list, 64 | ) -> Tuple: 65 | """Searches the relevant plugins for torrents. 66 | 67 | Looks in the cache first. Ideally finds the listings there. 68 | 69 | In case of a miss, invokes the search method of each plugin (which might 70 | be time consuming). 71 | 72 | Note: 73 | 1. This will cause the cache to update in case of a miss. Which, if it is 74 | full, might cause even more delay. 75 | 2. For each filter, use either the include or the exclude variant. Using 76 | both may cause undefined behaviour. 77 | 78 | Args: 79 | search_param (str): The string to search for. 80 | include_categories (list): Categories of plugins to search 81 | exclude_categories (list): Categories of plugins to not search 82 | include_sites (list): Names of services to search 83 | exclude_sites (list): Names of services to not search 84 | 85 | Returns: 86 | A tuple in the form ([], bool). The bool is True in case of a cache hit, 87 | False otherwise. 88 | 89 | Raises: 90 | InvalidSearchError: if both include and exclude variants of a filter are 91 | used together or if no plugins are left after filtering. 92 | 93 | """ 94 | # should not be using include and exclude together 95 | if include_categories and exclude_categories or include_sites and exclude_sites: 96 | raise InvalidSearchError() 97 | 98 | search_term = search_term.lower() 99 | 100 | plugins = self.plugins_manager.filter_plugins( 101 | include_categories, exclude_categories, include_sites, exclude_sites 102 | ) 103 | 104 | results, cache_hit = self.try_cache(search_term, plugins) 105 | if not cache_hit: 106 | results = await self.update_cache(search_term, plugins) 107 | 108 | return (results, cache_hit) 109 | 110 | def try_cache(self, search_param: str, plugins: list) -> Tuple: 111 | """Returns the listings from the cache. 112 | 113 | Args: 114 | search_param (str): The string to search for. 115 | plugins (list): Plugin objects implementing the `search()` method. 116 | 117 | Returns: 118 | A tuple containing a list of torrents and a bool denoting if there was a 119 | cache hit or not. 120 | 121 | """ 122 | cache_hit = self.cache.read(search_param, plugins) 123 | 124 | if not cache_hit: 125 | return [], False 126 | 127 | return cache_hit, True 128 | 129 | async def update_cache(self, search_param: str, plugins: list) -> list: 130 | """Updates the cache. 131 | 132 | Searches each plugin in the category and puts its results into the cache. 133 | 134 | Note: 135 | If the cache has grown more than the size specified in the config 136 | file - deletes the least frequently used entry and replaces it. 137 | 138 | Args: 139 | search_param (str): the string to search for. 140 | plugins (list): Plugin objects implementing the `search()` method. 141 | 142 | Returns: 143 | List of torrents matching the search query 144 | 145 | """ 146 | results = await self.search_plugins(search_param, plugins) 147 | 148 | if not results: 149 | return [] 150 | 151 | self.cache.store(search_param, plugins, results) 152 | 153 | return results 154 | 155 | async def search_plugins(self, search_param: str, plugins: list) -> list: 156 | """Searches the plugins etxcept the ones passed in `except_plugins` 157 | 158 | This is an asynchronus function which fires off the plugins, which, in turn, 159 | sends off HTTP requests, parse the results, and return their respective 160 | results. 161 | 162 | Args: 163 | search_param (str): the string to search for. 164 | plugins (list): Plugin objects implementing the `search()` method. 165 | 166 | Returns: 167 | A list of compiled results from the specified plugin. 168 | 169 | """ 170 | results = [] 171 | 172 | session_timeout = ClientTimeout(total=self.timeout) 173 | async with ClientSession( 174 | connector=TCPConnector(ssl=False), timeout=session_timeout 175 | ) as session: 176 | tasks = self.create_search_tasks(session, search_param, plugins) 177 | results = await asyncio.gather(*tasks, return_exceptions=True) 178 | 179 | results = self.exclude_errors(results) 180 | 181 | return self.flatten(results) 182 | 183 | def create_search_tasks( 184 | self, session: ClientSession, search_param: str, plugins: list 185 | ) -> list: 186 | """Creates async tasks for each plugin""" 187 | tasks = [] 188 | for plugin in plugins: 189 | search_future = plugin.search(session, search_param) 190 | task = asyncio.create_task(search_future) 191 | tasks.append(task) 192 | 193 | return tasks 194 | 195 | def exclude_errors(self, listings: list): 196 | return [listing for listing in listings if isinstance(listing, list)] 197 | 198 | def flatten(self, t: list) -> list: 199 | return [item for sublist in t for item in sublist] 200 | -------------------------------------------------------------------------------- /cleanbay/cache_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-module-docstring 2 | from .abstract_cache_manager import AbstractCacheManager 3 | from .lfu_cache import LFUCache 4 | -------------------------------------------------------------------------------- /cleanbay/cache_manager/abstract_cache_manager.py: -------------------------------------------------------------------------------- 1 | """Contains the cache manager interface/abstract class""" 2 | from abc import ABC, abstractmethod 3 | 4 | 5 | class AbstractCacheManager(ABC): 6 | """All cache managers must be derived from this class""" 7 | 8 | @abstractmethod 9 | def store(self, search_term: str, plugins: list, listings: list): 10 | """Stores a search result into the cache. 11 | 12 | Arguments: 13 | search_term (str): The string that was searched. 14 | plugins (list): List of Plugin objects used in the search. 15 | listings (list): List of Torrents returned from the search. 16 | 17 | """ 18 | pass 19 | 20 | @abstractmethod 21 | def read(self, search_term: str, plugins: list) -> list: 22 | """Reads an item from the cache. 23 | 24 | Arguments: 25 | search_term (str): The string that was searched. 26 | plugins (list): List of Plugin objects used in the search. 27 | 28 | Returns: 29 | A list of Torrents. 30 | 31 | """ 32 | pass 33 | -------------------------------------------------------------------------------- /cleanbay/cache_manager/lfu_cache.py: -------------------------------------------------------------------------------- 1 | """Contains the implementation for LFU-based cache manager""" 2 | from datetime import datetime, timedelta 3 | 4 | from typing import Tuple 5 | 6 | from cleanbay.cache_manager.abstract_cache_manager import AbstractCacheManager 7 | 8 | 9 | class LFUCache(AbstractCacheManager): 10 | """Manages an LFU cache with a timeout. 11 | 12 | Attributes: 13 | lines (dict): Cache items hashed by the tuple of the search term and the 14 | names of the plugins utilized in the search. 15 | max_size (int): Maximum number of entries in the cache. 16 | timeout (timedelta): Time in seconds after which a cache entry is 17 | invalidated. 18 | 19 | """ 20 | 21 | def __init__(self, max_size: int, timeout: int): 22 | """Initializes the cache. 23 | 24 | Arguments: 25 | max_size (int): Maximum number of entries in the cache. 26 | timeout (int): Time in seconds after which a cache entry is invalidated. 27 | 28 | """ 29 | self.lines = {} 30 | self.max_size = max_size 31 | self.timeout = timedelta(seconds=timeout) 32 | 33 | def store(self, search_term: str, plugins: list, listings: list): 34 | """Stores a search result into the cache. 35 | 36 | If the cache is at its maximum size, the least frequently used item is 37 | deleted before storing the incoming item. 38 | 39 | Arguments: 40 | search_term (str): The string that was searched. 41 | plugins (list): List of Plugin objects used in the search. 42 | listings (list): List of Torrents returned from the search. 43 | 44 | """ 45 | if len(self.lines) == self.max_size: 46 | lfu = self.least_frequently_used() 47 | del self.lines[lfu] 48 | 49 | key = self.make_key(search_term, plugins) 50 | self.lines[key] = { 51 | "listings": listings, 52 | "hit_count": 1, 53 | "store_time": datetime.now(), 54 | } 55 | 56 | def read(self, search_term: str, plugins: list) -> list: 57 | """Reads an item from the cache. 58 | 59 | If the item is in the cache, its 'hit_count' is increased by 1. In case of 60 | a cache miss, an empty list is returned. 61 | 62 | Arguments: 63 | search_term (str): The string that was searched. 64 | plugins (list): List of Plugin objects used in the search. 65 | 66 | Returns: 67 | A list of Torrents. 68 | 69 | """ 70 | key = self.make_key(search_term, plugins) 71 | 72 | if key not in self.lines: 73 | return {} 74 | if not self.is_valid(self.lines[key]): 75 | return {} 76 | 77 | self.lines[key]["hit_count"] += 1 78 | return self.lines[key]["listings"] 79 | 80 | def is_valid(self, line: dict) -> bool: 81 | """Checks if the cache item has timed out. 82 | 83 | Arguments: 84 | line (dict): The cache line to check. 85 | 86 | Returns: 87 | True if the item hasn't timed out. False otherwise. 88 | """ 89 | current_time = datetime.now() 90 | store_time = line["store_time"] 91 | 92 | return current_time - store_time < self.timeout 93 | 94 | def make_key(self, search_term: str, plugins: list) -> Tuple: 95 | names = [plugin.info()["name"] for plugin in plugins] 96 | return (search_term, frozenset(names)) 97 | 98 | def least_frequently_used(self): 99 | return min(self.lines.items(), key=lambda x: x[1]["hit_count"])[0] 100 | -------------------------------------------------------------------------------- /cleanbay/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gr3atWh173/cleanbay/f61a72e5d7ab7190db1c66241d425660d34dbc9e/cleanbay/plugins/__init__.py -------------------------------------------------------------------------------- /cleanbay/plugins/eztv.py: -------------------------------------------------------------------------------- 1 | """Contains the impl for the eztv plugin""" 2 | 3 | from bs4 import BeautifulSoup, SoupStrainer 4 | from requests import get as sync_get 5 | 6 | from ..abstract_plugin import AbstractPlugin 7 | from ..torrent import Torrent, Category 8 | 9 | 10 | class CBPlugin(AbstractPlugin): # pylint: disable=missing-class-docstring 11 | def verify_status(self): 12 | return sync_get(self.info()["domain"], timeout=10).status_code == 200 13 | 14 | async def search(self, session, search_param): 15 | info = self.info() 16 | url = info["search_url"] + search_param 17 | resp = await session.get(url) 18 | 19 | strainer = SoupStrainer("table") 20 | resp = BeautifulSoup(await resp.text(), features="lxml", parse_only=strainer) 21 | 22 | table = resp.findChildren("table")[4] 23 | if len(table) == 0: 24 | return [] 25 | 26 | torrents = [] 27 | for row in table.findChildren("tr")[2:]: 28 | seeders = row.findChildren("td")[5].text 29 | if not seeders.isnumeric(): 30 | seeders = 0 31 | else: 32 | seeders = int(seeders) 33 | 34 | try: 35 | magnet = row.findChildren("td")[2].findChildren("a")[0]["href"] 36 | except IndexError: 37 | continue 38 | 39 | torrents.append( 40 | Torrent( 41 | row.findChildren("td")[1].text.strip(), 42 | magnet, 43 | seeders, 44 | -1, 45 | row.findChildren("td")[3].text, 46 | "eztv", 47 | row.findChildren("td")[4].text, 48 | ) 49 | ) 50 | return torrents 51 | 52 | def info(self): 53 | return { 54 | "name": "eztv", 55 | "category": Category.TV, 56 | "domain": "https://eztv.re", 57 | "search_url": "https://eztv.re/search/", 58 | } 59 | -------------------------------------------------------------------------------- /cleanbay/plugins/leetx.py.disabled: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup, SoupStrainer 2 | import asyncio # pylint: disable=unused-import 3 | 4 | from ..abstract_plugin import AbstractPlugin 5 | from ..torrent import Torrent, Category 6 | 7 | 8 | class CBPlugin(AbstractPlugin): 9 | def verify_status(self): 10 | return True 11 | 12 | async def search(self, session, search_param): 13 | info = self.info() 14 | domain, useragent = info['domain'], info['user-agent'] 15 | url = f'{domain}/search/{search_param}/1/' 16 | resp = await session.get(url, headers={'User-Agent': useragent}) 17 | 18 | strainer = SoupStrainer('table') 19 | resp = BeautifulSoup( 20 | await resp.text(), 21 | features='lxml', 22 | parse_only=strainer) 23 | 24 | table = resp.findChildren('table') 25 | if len(table) == 0: 26 | return [] 27 | 28 | torrents = [] 29 | for row in table[0].findChildren('tr')[1:]: 30 | torrents.append(Torrent( 31 | row.findChildren('td')[0].findChildren('a')[1].text, 32 | # TODO(gr3atwh173): create a function to get the magnet from this link 33 | row.findChildren('td')[0].findChildren('a')[1]['href'], 34 | int(row.findChildren('td')[1].text), 35 | int(row.findChildren('td')[2].text), 36 | row.findChildren('td')[4].text.split('B')[0] + 'B', 37 | row.findChildren('td')[5].text, 38 | row.findChildren('td')[3].text 39 | )) 40 | return torrents 41 | 42 | def info(self): 43 | return { 44 | 'name': 'leetx', 45 | 'category': Category.GENERAL, 46 | 'domain': 'https://1337x.to', 47 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' 48 | } 49 | -------------------------------------------------------------------------------- /cleanbay/plugins/libgen.py: -------------------------------------------------------------------------------- 1 | """Contains the imple for the libgen plugin""" 2 | 3 | from urllib.parse import quote as uri_quote 4 | import aiohttp 5 | 6 | import requests 7 | 8 | from bs4 import BeautifulSoup, SoupStrainer 9 | 10 | from ..torrent import Torrent, Category 11 | from ..abstract_plugin import AbstractPlugin 12 | 13 | 14 | class CBPlugin(AbstractPlugin): # pylint: disable=missing-class-docstring 15 | def verify_status(self) -> bool: 16 | domain = self.info()["domain"] 17 | return requests.get(domain, timeout=10).status_code == 200 18 | 19 | async def search(self, session: aiohttp.ClientSession, search_param: str) -> list: 20 | domain = self.info()["domain"] 21 | search_param = uri_quote(search_param) 22 | res = await session.get(f"{domain}/search.php?req={search_param}") 23 | 24 | strainer = SoupStrainer("table") 25 | soup = BeautifulSoup(await res.text(), features="lxml", parse_only=strainer) 26 | 27 | table = soup.findChildren("table")[2] 28 | 29 | torrents = [] 30 | for row in table.findChildren("tr")[1:]: 31 | cols = row.findChildren("td") 32 | 33 | author = cols[1].text 34 | title = cols[2].text 35 | publisher = cols[3].text 36 | year = cols[4].text 37 | pages = cols[5].text 38 | language = cols[6].text 39 | size = cols[7].text 40 | download = cols[9].find("a")["href"] 41 | 42 | # construct the name 43 | name = [] 44 | if author: 45 | name.append(f"[{author}]") 46 | if title: 47 | name.append(title) 48 | name = " ".join(name) 49 | 50 | # construct additional info 51 | info = [] 52 | if publisher: 53 | info.append(publisher) 54 | if language: 55 | info.append(language) 56 | if year: 57 | info.append(year) 58 | if pages: 59 | info.append(f"{pages}p") 60 | info = ", ".join(info) 61 | 62 | if info: 63 | name += f" ({info})" 64 | 65 | torrents.append( 66 | Torrent(name, download, 1, -1, size.upper(), "libgen", year) 67 | ) 68 | 69 | return torrents 70 | 71 | def info(self) -> dict: 72 | return { 73 | "name": "libgen", 74 | "category": Category.BOOKS, 75 | "domain": "https://libgen.is", 76 | } 77 | -------------------------------------------------------------------------------- /cleanbay/plugins/linuxtracker.py: -------------------------------------------------------------------------------- 1 | """Contains the impl for the linuxtracker plugin""" 2 | 3 | from bs4 import BeautifulSoup 4 | from requests import get as sync_get 5 | 6 | from ..abstract_plugin import AbstractPlugin 7 | from ..torrent import Torrent, Category 8 | 9 | 10 | class CBPlugin(AbstractPlugin): # pylint: disable=missing-class-docstring 11 | def info(self): 12 | return { 13 | "name": "linuxtracker", 14 | "category": Category.SOFTWARE, 15 | "domain": "https://linuxtracker.org", 16 | } 17 | 18 | def verify_status(self): 19 | domain = self.info()["domain"] 20 | return sync_get(domain, timeout=10).status_code == 200 21 | 22 | async def search(self, session, search_param): 23 | domain = self.info()["domain"] 24 | search_url = "{}/index.php?page=torrents&search={}&category=0&active=1" 25 | 26 | resp = await session.get(search_url.format(domain, search_param)) 27 | soup = BeautifulSoup(await resp.text(), features="lxml") 28 | 29 | table = soup.find_all("table", {"class": "lista"})[4] 30 | if len(table) == 0: 31 | return [] 32 | 33 | torrents = [] 34 | for row in table.findChildren("tr")[1:]: 35 | try: 36 | name = row.find_all("td")[1].find_all()[2].text 37 | magnet = row.find_all("td")[1].find_all()[26].find_all("a")[1]["href"] 38 | date, size, seeders, leechers = self.extract_info( 39 | row.find_all("td")[1].find_all()[7].text.split("\n")[2:6] 40 | ) 41 | 42 | torrents.append( 43 | Torrent( 44 | name, 45 | magnet, 46 | int(seeders), 47 | int(leechers), 48 | size, 49 | "linuxtracker", 50 | date, 51 | ) 52 | ) 53 | except IndexError: 54 | pass 55 | 56 | return torrents 57 | 58 | def extract_info(self, raw_list): 59 | date = raw_list[0].split(":")[1].strip() 60 | size = raw_list[1].split(":")[1].strip() 61 | seeders = raw_list[2].strip().split(" ")[1] 62 | leechers = raw_list[3].strip().split(" ")[1] 63 | 64 | return (date, size, seeders, leechers) 65 | -------------------------------------------------------------------------------- /cleanbay/plugins/nyaa.py: -------------------------------------------------------------------------------- 1 | """Contains the impl for the nyaa plugin""" 2 | 3 | from bs4 import BeautifulSoup, SoupStrainer 4 | from requests import get as sync_get 5 | 6 | from ..abstract_plugin import AbstractPlugin 7 | from ..torrent import Torrent, Category 8 | 9 | 10 | class CBPlugin(AbstractPlugin): # pylint: disable=missing-class-docstring 11 | def verify_status(self): 12 | return sync_get(self.info()["domain"], timeout=10).status_code == 200 13 | 14 | async def search(self, session, search_param): 15 | info = self.info() 16 | url = info["search_url"] + search_param 17 | resp = await session.get(url) 18 | 19 | strainer = SoupStrainer("table") 20 | resp = BeautifulSoup(await resp.text(), features="lxml", parse_only=strainer) 21 | 22 | table = resp.findChildren("table")[0] 23 | 24 | if len(table) == 0: 25 | return [] 26 | 27 | torrents = [] 28 | for row in table.findChildren("tr")[1:]: 29 | row_children = row.findChildren("td") 30 | 31 | seeders = row_children[5].text 32 | if not seeders.isnumeric(): 33 | seeders = 0 34 | else: 35 | seeders = int(seeders) 36 | 37 | leechers = row_children[6].text 38 | if not leechers.isnumeric(): 39 | leechers = 0 40 | else: 41 | leechers = int(leechers) 42 | 43 | try: 44 | magnet = row_children[2].findChildren("a")[1]["href"] 45 | except IndexError: 46 | continue 47 | 48 | torrents.append( 49 | Torrent( 50 | row_children[1].text.strip(), 51 | magnet, 52 | seeders, 53 | leechers, 54 | row_children[3].text.replace("i", ""), 55 | "nyaa", 56 | row_children[4].text, 57 | ) 58 | ) 59 | return torrents 60 | 61 | def info(self): 62 | return { 63 | "name": "nyaa", 64 | "category": Category.TV, 65 | "domain": "https://nyaa.iss.ink/", 66 | "search_url": "https://nyaa.iss.ink/?f=0&c=0_0&q=", 67 | } 68 | -------------------------------------------------------------------------------- /cleanbay/plugins/piratebay.py: -------------------------------------------------------------------------------- 1 | """Contains the impl for the piratebay plugin""" 2 | 3 | from requests import get as get_sync 4 | from urllib.parse import quote as uri_quote 5 | from datetime import datetime, timezone 6 | 7 | import math 8 | 9 | from ..abstract_plugin import AbstractPlugin 10 | from ..torrent import Torrent, Category 11 | 12 | 13 | class CBPlugin(AbstractPlugin): # pylint: disable=missing-class-docstring 14 | def info(self): 15 | return { 16 | "name": "piratebay", 17 | "category": Category.GENERAL, 18 | "domain": "https://apibay.org", 19 | # pylint: disable=line-too-long 20 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36", 21 | } 22 | 23 | def verify_status(self): 24 | domain, useragent = self.info()["domain"], self.info()["user-agent"] 25 | return ( 26 | get_sync(domain, headers={"user-agent": useragent}, timeout=10).status_code 27 | != 500 28 | ) 29 | 30 | async def search(self, session, search_param): 31 | domain, useragent = self.info()["domain"], self.info()["user-agent"] 32 | 33 | resp = await session.get( 34 | f"{domain}/q.php?q={search_param}&cat=", headers={"user-agent": useragent} 35 | ) 36 | 37 | if resp.status != 200: 38 | return [] 39 | 40 | torrents = [] 41 | for element in await resp.json(): 42 | torrents.append( 43 | Torrent( 44 | element["name"], 45 | self.make_magnet(element["info_hash"], element["name"]), 46 | int(element["seeders"]), 47 | int(element["leechers"]), 48 | self.format_size(int(element["size"])), 49 | element["username"], 50 | self.format_date(int(element["added"])), 51 | ) 52 | ) 53 | return torrents 54 | 55 | def make_magnet(self, ih, name): 56 | return f"magnet:?xt=urn:btih:{ih}&dn={uri_quote(name)}&tr={self.trackers()}" 57 | 58 | def trackers(self): 59 | trackers = "&tr=".join( 60 | [ 61 | "udp://tracker.coppersurfer.tk:6969/announce", 62 | "udp://tracker.openbittorrent.com:6969/announce", 63 | "udp://9.rarbg.to:2710/announce", 64 | "udp://9.rarbg.me:2780/announce", 65 | "udp://9.rarbg.to:2730/announce", 66 | "udp://tracker.opentrackr.org:1337", 67 | "http://p4p.arenabg.com:1337/announce", 68 | "udp://tracker.torrent.eu.org:451/announce", 69 | "udp://tracker.tiny-vps.com:6969/announce", 70 | "udp://open.stealth.si:80/announce", 71 | ] 72 | ) 73 | return uri_quote(trackers) 74 | 75 | def format_size(self, size_bytes): 76 | if size_bytes == 0: 77 | return "0B" 78 | size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") 79 | i = int(math.floor(math.log(size_bytes, 1024))) 80 | p = math.pow(1024, i) 81 | s = round(size_bytes / p, 2) 82 | # return "%s %s" % (s, size_name[i]) 83 | return f"{s} {size_name[i]}" 84 | 85 | def format_date(self, epoch): 86 | return datetime.fromtimestamp(epoch, timezone.utc).strftime("%Y-%m-%d %H:%M:%S") 87 | -------------------------------------------------------------------------------- /cleanbay/plugins/yts.py: -------------------------------------------------------------------------------- 1 | """Contains the impl for the yts plugin""" 2 | 3 | from requests import get as get_sync 4 | from urllib.parse import quote as uri_quote 5 | 6 | from ..abstract_plugin import AbstractPlugin 7 | from ..torrent import Torrent, Category 8 | 9 | 10 | class CBPlugin(AbstractPlugin): # pylint: disable=missing-class-docstring 11 | def verify_status(self) -> bool: 12 | domain = self.info()["domain"] 13 | return get_sync(domain, timeout=10).status_code == 200 14 | 15 | def info(self) -> dict: 16 | return { 17 | "name": "yts", 18 | "category": Category.CINEMA, 19 | "api_url": "https://yts.mx/api/v2/list_movies.json?query_term=", 20 | "domain": "https://yts.mx", 21 | } 22 | 23 | async def search(self, session, search_param): 24 | api_url = self.info()["api_url"] 25 | resp = await session.get(api_url + uri_quote(search_param)) 26 | resp = await resp.json() 27 | 28 | if resp["status"] != "ok" or resp["data"]["movie_count"] == 0: 29 | return [] 30 | 31 | torrents = [] 32 | for element in resp["data"]["movies"]: 33 | max_seed_torrent = max(element["torrents"], key=lambda x: x["seeds"]) 34 | 35 | title_long = element["title_long"] 36 | slug = element["slug"] 37 | quality = max_seed_torrent["quality"] 38 | type_ = max_seed_torrent["type"] 39 | info_hash = max_seed_torrent["hash"] 40 | seeders = max_seed_torrent["seeds"] 41 | leechers = max_seed_torrent["peers"] 42 | size = max_seed_torrent["size"] 43 | date_uploaded = max_seed_torrent["date_uploaded"] 44 | 45 | torrents.append( 46 | Torrent( 47 | f"{title_long} [{quality}] [{type_}]", 48 | self.make_magnet(slug, info_hash), 49 | int(seeders), 50 | int(leechers), 51 | size, 52 | "yts", 53 | date_uploaded, 54 | ) 55 | ) 56 | 57 | return torrents 58 | 59 | def make_magnet(self, slug, ih): 60 | return f"magnet:?xt=urn:btih:{ih}&dn={slug}&tr={self.trackers()}" 61 | 62 | def trackers(self): 63 | trackers = "&tr=".join( 64 | [ 65 | "udp://open.demonii.com:1337/announce", 66 | "udp://tracker.openbittorrent.com:80", 67 | "udp://tracker.coppersurfer.tk:6969", 68 | "udp://glotorrents.pw:6969/announce", 69 | "udp://tracker.opentrackr.org:1337/announce", 70 | "udp://torrent.gresille.org:80/announce", 71 | "udp://p4p.arenabg.com:1337", 72 | "udp://tracker.leechers-paradise.org:6969", 73 | ] 74 | ) 75 | return uri_quote(trackers) 76 | -------------------------------------------------------------------------------- /cleanbay/plugins_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-module-docstring 2 | from .plugins_manager import PluginsManager, NoPluginsError 3 | -------------------------------------------------------------------------------- /cleanbay/plugins_manager/plugins_manager.py: -------------------------------------------------------------------------------- 1 | """Contains PluginsManager and NoPluginsError""" 2 | from importlib import import_module 3 | from os.path import isfile, basename 4 | import glob 5 | 6 | 7 | class NoPluginsError(Exception): 8 | """Indicates that no usable plugins could be loaded.""" 9 | 10 | pass 11 | 12 | 13 | class PluginsManager: 14 | """Manages the loading and filtering of plugins. 15 | 16 | Attributes: 17 | plugins (dict): Plugin objects hashed by their names 18 | 19 | """ 20 | 21 | def __init__(self, directory: str): 22 | """Loads the plugins. 23 | 24 | Arguments: 25 | directory (str): The directory to load the plugins from. 26 | 27 | """ 28 | self.plugins = {} 29 | 30 | # import all the files ending with `.py` except __init__ 31 | modules = glob.glob(f"{directory}/*.py") 32 | plugins = [ 33 | import_module(f"cleanbay.plugins.{basename(f)[:-3]}") 34 | for f in modules 35 | if isfile(f) and not f.endswith("__init__.py") 36 | ] 37 | 38 | # filter out the unusable plugins 39 | for plugin in plugins: 40 | try: 41 | plugin = plugin.CBPlugin() 42 | info = plugin.info() 43 | 44 | if not plugin.verify_status(): 45 | continue 46 | if ("name" not in info) or ("category" not in info): 47 | continue 48 | 49 | self.plugins[info["name"]] = plugin 50 | except TypeError: 51 | # TODO(gr3atwh173): add logging 52 | pass 53 | except: # pylint: disable=bare-except 54 | pass 55 | 56 | def filter_plugins( 57 | self, 58 | include_categories: list, 59 | exclude_categories: list, 60 | include_sites: list, 61 | exclude_sites: list, 62 | ) -> list: 63 | """Filters the plugins based on the passed arguments. 64 | 65 | Individual plugins are given more preference than categories. If a plugin 66 | was excluded in the category filtering phase, it may be added back if it 67 | was passed in the `include_sites` list. 68 | 69 | Args: 70 | include_categories (list): Categories of plugins to search 71 | exclude_categories (list): Categories of plugins to not search 72 | include_sites (list): Names of services to search 73 | exclude_sites (list): Names of services to not search 74 | 75 | Returns: 76 | A list of filtered plugin objects. 77 | 78 | Raises: 79 | NoPluginsError: if there are no usable plugins 80 | 81 | """ 82 | if not self.plugins: 83 | raise NoPluginsError() 84 | 85 | filtered_plugins = set(self.plugins.values()) 86 | 87 | # categories 88 | if include_categories: 89 | filtered_plugins = set() 90 | for plugin in self.plugins.values(): 91 | cat = plugin.info()["category"] 92 | if cat in include_categories: 93 | filtered_plugins.add(plugin) 94 | 95 | elif exclude_categories: 96 | for plugin in self.plugins.values(): 97 | cat = plugin.info()["category"] 98 | if cat in exclude_categories: 99 | filtered_plugins.remove(plugin) 100 | 101 | # sites 102 | if include_sites: 103 | filtered_plugins = set() 104 | for site, plugin in self.plugins.items(): 105 | if site in include_sites: 106 | filtered_plugins.add(plugin) 107 | 108 | elif exclude_sites: 109 | for site, plugin in self.plugins.items(): 110 | if site in exclude_sites and plugin in filtered_plugins: 111 | filtered_plugins.remove(plugin) 112 | 113 | return list(filtered_plugins) 114 | -------------------------------------------------------------------------------- /cleanbay/torrent.py: -------------------------------------------------------------------------------- 1 | """contains the `Torrent` data class and the `Category` enum""" 2 | from dataclasses import dataclass 3 | from enum import Enum 4 | 5 | 6 | class Category(Enum): 7 | """Represents the category of a plugin. 8 | 9 | Variants: 10 | ALL: Everything under the sun 11 | GENERAL: Plugins that track everything 12 | CINEMA: Plugins that track movies 13 | TV: Plugins that track shows on TV, OTT or anything that's not a movie 14 | SOFTWARE: Plugins that track software excluding games 15 | BOOKS: Plugins that index books or audiobooks 16 | 17 | """ 18 | 19 | ALL = 0 20 | GENERAL = 1 21 | CINEMA = 2 22 | TV = 3 23 | SOFTWARE = 4 24 | BOOKS = 5 25 | 26 | 27 | @dataclass 28 | class Torrent: 29 | """Represents a torrent listing. 30 | 31 | Attributes: 32 | name (str): Name/title of the torrent 33 | magnet (str): Magnet URL of the torrent 34 | seeders (int): Number of seeders. -1 if not listed 35 | leechers (int): Number of leechers. -1 if not listed 36 | size (str): Size in the format " " 37 | uploader (str): Username of the uploader 38 | uploaded_at (str): Upload date or time since upload 39 | 40 | """ 41 | 42 | name: str 43 | magnet: str 44 | seeders: int 45 | leechers: int 46 | size: int 47 | uploader: str 48 | uploaded_at: str 49 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "cleanbay" 3 | version = "0.2.0.beta" 4 | description = "A metasearch engine for torrents" 5 | authors = ["Gr3atWh173"] 6 | license = "MIT" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.11.4" 10 | beautifulsoup4 = "^4.12.3" 11 | lxml = "^5.1.0" 12 | requests = "^2.31.0" 13 | aiohttp = "^3.9.4" 14 | uvicorn = {extras = ["standard"], version = "^0.27.0.post1"} 15 | fastapi = "^0.109.2" 16 | gunicorn = "^23.0.0" 17 | python-dotenv = "^1.0.1" 18 | slowapi = "^0.1.9" 19 | pytest = "^8.0.0" 20 | certifi = "^2024.2.2" 21 | httpx = "^0.26.0" 22 | pydantic-settings = "^2.2.0" 23 | 24 | [tool.poetry.dev-dependencies] 25 | pylint = "^2.11.1" 26 | 27 | [tool.poetry.group.dev.dependencies] 28 | pre-commit = "^3.6.2" 29 | 30 | [build-system] 31 | requires = ["poetry-core>=1.0.0"] 32 | build-backend = "poetry.core.masonry.api" 33 | -------------------------------------------------------------------------------- /test_app.py: -------------------------------------------------------------------------------- 1 | """Integration tests for the app""" 2 | 3 | import re 4 | from os import getenv 5 | from time import sleep 6 | 7 | from fastapi.testclient import TestClient 8 | 9 | from dotenv import load_dotenv 10 | 11 | from app.main import app 12 | 13 | 14 | load_dotenv() 15 | cache_timeout = int(getenv("CACHE_TIMEOUT", "300")) 16 | 17 | client = TestClient(app) 18 | 19 | 20 | def test_status(): 21 | response = client.get("/api/v1/status") 22 | assert response.status_code == 200 23 | assert response.json()["status"] == "ok" 24 | 25 | 26 | def test_empty_search(): 27 | response = client.post( 28 | "/api/v1/search", 29 | json={ 30 | "search_term": "", 31 | }, 32 | ) 33 | 34 | assert response.status_code == 422 35 | 36 | 37 | def test_simple_search(): 38 | response = client.post( 39 | "/api/v1/search", 40 | json={ 41 | "search_term": "star wars", 42 | }, 43 | ) 44 | 45 | assert response.status_code == 200 46 | assert response.json()["length"] > 0 47 | 48 | for listing in response.json()["data"]: 49 | assert listing["magnet"].startswith("magnet:?xt=urn:btih") or is_valid_url( 50 | listing["magnet"] 51 | ) 52 | 53 | 54 | def test_include_site(): 55 | response = client.post( 56 | "/api/v1/search", 57 | json={ 58 | "search_term": "kali", 59 | "include_categories": ["software"], 60 | }, 61 | ) 62 | 63 | assert response.status_code == 200 64 | assert response.json()["length"] > 0 65 | 66 | for listing in response.json()["data"]: 67 | assert listing["magnet"].startswith("magnet:?xt=urn:btih") or is_valid_url( 68 | listing["magnet"] 69 | ) 70 | assert listing["uploader"] == "linuxtracker" 71 | 72 | 73 | def test_exclude_categories(): 74 | response = client.post( 75 | "/api/v1/search", 76 | json={ 77 | "search_term": "alpine", 78 | "exclude_categories": ["software"], 79 | }, 80 | ) 81 | 82 | assert response.status_code == 200 83 | assert response.json()["length"] > 0 84 | 85 | for listing in response.json()["data"]: 86 | assert listing["magnet"].startswith("magnet:?xt=urn:btih") or is_valid_url( 87 | listing["magnet"] 88 | ) 89 | assert listing["uploader"] != "linuxtracker" 90 | 91 | 92 | def test_include_exclude_categories(): 93 | response = client.post( 94 | "/api/v1/search", 95 | json={ 96 | "search_term": "alpine", 97 | "include_categories": ["software"], 98 | "exclude_categories": ["cinema"], 99 | }, 100 | ) 101 | 102 | assert response.status_code == 422 103 | 104 | 105 | def test_include_sites(): 106 | response = client.post( 107 | "/api/v1/search", 108 | json={ 109 | "search_term": "kali", 110 | "include_sites": ["linuxtracker"], 111 | }, 112 | ) 113 | 114 | assert response.status_code == 200 115 | assert response.json()["length"] > 0 116 | 117 | for listing in response.json()["data"]: 118 | assert listing["magnet"].startswith("magnet:?xt=urn:btih") or is_valid_url( 119 | listing["magnet"] 120 | ) 121 | assert listing["uploader"] == "linuxtracker" 122 | 123 | 124 | def test_exclude_sites(): 125 | response = client.post( 126 | "/api/v1/search", 127 | json={ 128 | "search_term": "alpine", 129 | "exclude_sites": ["linuxtracker"], 130 | }, 131 | ) 132 | 133 | assert response.status_code == 200 134 | assert response.json()["length"] > 0 135 | 136 | for listing in response.json()["data"]: 137 | assert listing["magnet"].startswith("magnet:?xt=urn:btih") or is_valid_url( 138 | listing["magnet"] 139 | ) 140 | assert listing["uploader"] != "linuxtracker" 141 | 142 | 143 | def test_include_exclude_sites(): 144 | response = client.post( 145 | "/api/v1/search", 146 | json={ 147 | "search_term": "kali", 148 | "include_sites": ["linuxtracker"], 149 | "exclude_sites": ["yts"], 150 | }, 151 | ) 152 | 153 | assert response.status_code == 422 154 | 155 | 156 | def test_advanced_search(): 157 | response = client.post( 158 | "/api/v1/search", 159 | json={ 160 | "search_term": "alpine", 161 | "include_categories": ["software"], 162 | "exclude_sites": ["eztv", "piratebay"], 163 | }, 164 | ) 165 | 166 | assert response.status_code == 200 167 | assert response.json()["length"] > 0 168 | 169 | for listing in response.json()["data"]: 170 | assert listing["uploader"] not in ["eztv", "piratebay"] 171 | 172 | 173 | def test_cache(): 174 | response_first = client.post( 175 | "/api/v1/search", 176 | json={ 177 | "search_term": "dune", 178 | "include_sites": ["yts"], 179 | }, 180 | ) 181 | 182 | response_second = client.post( 183 | "/api/v1/search", 184 | json={ 185 | "search_term": "dune", 186 | "include_sites": ["yts"], 187 | }, 188 | ) 189 | 190 | assert response_first.json()["cache_hit"] is False 191 | assert response_second.json()["cache_hit"] is True 192 | 193 | 194 | def test_cache_timeout(): 195 | response_first = client.post( 196 | "/api/v1/search", 197 | json={ 198 | "search_term": "godfather", 199 | "include_categories": [], 200 | "exclude_categories": [], 201 | "include_sites": ["yts"], 202 | "exclude_sites": [], 203 | }, 204 | ) 205 | 206 | response_second = client.post( 207 | "/api/v1/search", 208 | json={ 209 | "search_term": "godfather", 210 | "include_categories": [], 211 | "exclude_categories": [], 212 | "include_sites": ["yts"], 213 | "exclude_sites": [], 214 | }, 215 | ) 216 | 217 | assert response_first.json()["cache_hit"] is False 218 | assert response_second.json()["cache_hit"] is True 219 | 220 | sleep(cache_timeout) 221 | 222 | response_third = client.post( 223 | "/api/v1/search", 224 | json={ 225 | "search_term": "godfather", 226 | "include_categories": [], 227 | "exclude_categories": [], 228 | "include_sites": ["yts"], 229 | "exclude_sites": [], 230 | }, 231 | ) 232 | 233 | response_fourth = client.post( 234 | "/api/v1/search", 235 | json={ 236 | "search_term": "godfather", 237 | "include_categories": [], 238 | "exclude_categories": [], 239 | "include_sites": ["yts"], 240 | "exclude_sites": [], 241 | }, 242 | ) 243 | 244 | assert response_third.json()["cache_hit"] is False 245 | assert response_fourth.json()["cache_hit"] is True 246 | 247 | 248 | # ================ utility functions ===================== 249 | 250 | 251 | def is_valid_url(url: str) -> bool: 252 | regex = re.compile( 253 | r"^(?:http|ftp)s?://" # http:// or https:// 254 | # pylint:disable=line-too-long 255 | r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|" # domain... 256 | r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip 257 | r"(?::\d+)?" # optional port 258 | r"(?:/?|[/?]\S+)$", 259 | re.IGNORECASE, 260 | ) 261 | return re.match(regex, url) is not None 262 | --------------------------------------------------------------------------------