├── .gitignore
├── .pre-commit-config.yaml
├── .pylintrc
├── LICENSE.txt
├── Procfile
├── README.md
├── app
    ├── __init__.py
    ├── helpers.py
    ├── main.py
    ├── schemas.py
    └── settings.py
├── cleanbay
    ├── __init__.py
    ├── abstract_plugin.py
    ├── backend.py
    ├── cache_manager
    │   ├── __init__.py
    │   ├── abstract_cache_manager.py
    │   └── lfu_cache.py
    ├── plugins
    │   ├── __init__.py
    │   ├── eztv.py
    │   ├── leetx.py.disabled
    │   ├── libgen.py
    │   ├── linuxtracker.py
    │   ├── nyaa.py
    │   ├── piratebay.py
    │   └── yts.py
    ├── plugins_manager
    │   ├── __init__.py
    │   └── plugins_manager.py
    └── torrent.py
├── poetry.lock
├── pyproject.toml
└── test_app.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode
 2 | .env
 3 | 
 4 | # Byte-compiled / optimized / DLL files
 5 | __pycache__/
 6 | *.py[cod]
 7 | *$py.class
 8 | 
 9 | # C extensions
10 | *.so
11 | 
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 | 
32 | # Unit test / coverage reports
33 | htmlcov/
34 | .tox/
35 | .nox/
36 | .coverage
37 | .coverage.*
38 | .cache
39 | nosetests.xml
40 | coverage.xml
41 | *.cover
42 | *.py,cover
43 | .hypothesis/
44 | .pytest_cache/
45 | cover/
46 | 
47 | # Translations
48 | *.mo
49 | *.pot
50 | 
51 | # Flask stuff:
52 | instance/
53 | .webassets-cache
54 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/psf/black
 3 |     rev: 24.3.0
 4 |     hooks:
 5 |     - id: black
 6 |       language_version: python3.11
 7 | -   repo: local
 8 |     hooks:
 9 |       - id: pylint
10 |         name: pylint
11 |         entry: pylint
12 |         language: system
13 |         types: [python]
14 |         args:
15 |           [
16 |             "-rn", # Only display messages
17 |             "-sn", # Don't display the score
18 |           ]


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
  1 | # This Pylint rcfile contains a best-effort configuration to uphold the
  2 | # best-practices and style described in the Google Python style guide:
  3 | #   https://google.github.io/styleguide/pyguide.html
  4 | #
  5 | # Its canonical open-source location is:
  6 | #   https://google.github.io/styleguide/pylintrc
  7 | 
  8 | [MAIN]
  9 | 
 10 | # Files or directories to be skipped. They should be base names, not paths.
 11 | ignore=third_party
 12 | 
 13 | # Files or directories matching the regex patterns are skipped. The regex
 14 | # matches against base names, not paths.
 15 | ignore-patterns=
 16 | 
 17 | # Pickle collected data for later comparisons.
 18 | persistent=no
 19 | 
 20 | # List of plugins (as comma separated values of python modules names) to load,
 21 | # usually to register additional checkers.
 22 | load-plugins=
 23 | 
 24 | # Use multiple processes to speed up Pylint.
 25 | jobs=4
 26 | 
 27 | # Allow loading of arbitrary C extensions. Extensions are imported into the
 28 | # active Python interpreter and may run arbitrary code.
 29 | unsafe-load-any-extension=no
 30 | 
 31 | 
 32 | [MESSAGES CONTROL]
 33 | 
 34 | # Only show warnings with the listed confidence levels. Leave empty to show
 35 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
 36 | confidence=
 37 | 
 38 | # Enable the message, report, category or checker with the given id(s). You can
 39 | # either give multiple identifier separated by comma (,) or put this option
 40 | # multiple time (only on the command line, not in the configuration file where
 41 | # it should appear only once). See also the "--disable" option for examples.
 42 | #enable=
 43 | 
 44 | # Disable the message, report, category or checker with the given id(s). You
 45 | # can either give multiple identifiers separated by comma (,) or put this
 46 | # option multiple times (only on the command line, not in the configuration
 47 | # file where it should appear only once).You can also use "--disable=all" to
 48 | # disable everything first and then reenable specific checks. For example, if
 49 | # you want to run only the similarities checker, you can use "--disable=all
 50 | # --enable=similarities". If you want to run only the classes checker, but have
 51 | # no Warning level messages displayed, use"--disable=all --enable=classes
 52 | # --disable=W"
 53 | disable=R,
 54 |         abstract-method,
 55 |         apply-builtin,
 56 |         arguments-differ,
 57 |         attribute-defined-outside-init,
 58 |         backtick,
 59 |         bad-option-value,
 60 |         basestring-builtin,
 61 |         buffer-builtin,
 62 |         c-extension-no-member,
 63 |         consider-using-enumerate,
 64 |         cmp-builtin,
 65 |         cmp-method,
 66 |         coerce-builtin,
 67 |         coerce-method,
 68 |         delslice-method,
 69 |         div-method,
 70 |         eq-without-hash,
 71 |         execfile-builtin,
 72 |         file-builtin,
 73 |         filter-builtin-not-iterating,
 74 |         fixme,
 75 |         getslice-method,
 76 |         global-statement,
 77 |         hex-method,
 78 |         idiv-method,
 79 |         implicit-str-concat,
 80 |         import-error,
 81 |         import-self,
 82 |         import-star-module-level,
 83 |         input-builtin,
 84 |         intern-builtin,
 85 |         invalid-str-codec,
 86 |         locally-disabled,
 87 |         long-builtin,
 88 |         long-suffix,
 89 |         map-builtin-not-iterating,
 90 |         misplaced-comparison-constant,
 91 |         missing-function-docstring,
 92 |         metaclass-assignment,
 93 |         next-method-called,
 94 |         next-method-defined,
 95 |         no-absolute-import,
 96 |         no-init,  # added
 97 |         no-member,
 98 |         no-name-in-module,
 99 |         no-self-use,
100 |         nonzero-method,
101 |         oct-method,
102 |         old-division,
103 |         old-ne-operator,
104 |         old-octal-literal,
105 |         old-raise-syntax,
106 |         parameter-unpacking,
107 |         print-statement,
108 |         raising-string,
109 |         range-builtin-not-iterating,
110 |         raw_input-builtin,
111 |         rdiv-method,
112 |         reduce-builtin,
113 |         relative-import,
114 |         reload-builtin,
115 |         round-builtin,
116 |         setslice-method,
117 |         signature-differs,
118 |         standarderror-builtin,
119 |         suppressed-message,
120 |         sys-max-int,
121 |         trailing-newlines,
122 |         unichr-builtin,
123 |         unicode-builtin,
124 |         unnecessary-pass,
125 |         unpacking-in-except,
126 |         useless-else-on-loop,
127 |         useless-suppression,
128 |         using-cmp-argument,
129 |         wrong-import-order,
130 |         xrange-builtin,
131 |         zip-builtin-not-iterating,
132 | 
133 | 
134 | [REPORTS]
135 | 
136 | # Set the output format. Available formats are text, parseable, colorized, msvs
137 | # (visual studio) and html. You can also give a reporter class, eg
138 | # mypackage.mymodule.MyReporterClass.
139 | output-format=text
140 | 
141 | # Tells whether to display a full report or only the messages
142 | reports=no
143 | 
144 | # Python expression which should return a note less than 10 (10 is the highest
145 | # note). You have access to the variables errors warning, statement which
146 | # respectively contain the number of errors / warnings messages and the total
147 | # number of statements analyzed. This is used by the global evaluation report
148 | # (RP0004).
149 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
150 | 
151 | # Template used to display messages. This is a python new-style format string
152 | # used to format the message information. See doc for all details
153 | #msg-template=
154 | 
155 | 
156 | [BASIC]
157 | 
158 | # Good variable names which should always be accepted, separated by a comma
159 | good-names=main,_
160 | 
161 | # Bad variable names which should always be refused, separated by a comma
162 | bad-names=
163 | 
164 | # Colon-delimited sets of names that determine each other's naming style when
165 | # the name regexes allow several styles.
166 | name-group=
167 | 
168 | # Include a hint for the correct naming format with invalid-name
169 | include-naming-hint=no
170 | 
171 | # List of decorators that produce properties, such as abc.abstractproperty. Add
172 | # to this list to register other decorators that produce valid properties.
173 | property-classes=abc.abstractproperty,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl
174 | 
175 | # Regular expression matching correct function names
176 | function-rgx=^(?:(?P<exempt>setUp|tearDown|setUpModule|tearDownModule)|(?P<camel_case>_?[A-Z][a-zA-Z0-9]*)|(?P<snake_case>_?[a-z][a-z0-9_]*))$
177 | 
178 | # Regular expression matching correct variable names
179 | variable-rgx=^[a-z][a-z0-9_]*$
180 | 
181 | # Regular expression matching correct constant names
182 | const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
183 | 
184 | # Regular expression matching correct attribute names
185 | attr-rgx=^_{0,2}[a-z][a-z0-9_]*$
186 | 
187 | # Regular expression matching correct argument names
188 | argument-rgx=^[a-z][a-z0-9_]*$
189 | 
190 | # Regular expression matching correct class attribute names
191 | class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
192 | 
193 | # Regular expression matching correct inline iteration names
194 | inlinevar-rgx=^[a-z][a-z0-9_]*$
195 | 
196 | # Regular expression matching correct class names
197 | class-rgx=^_?[A-Z][a-zA-Z0-9]*$
198 | 
199 | # Regular expression matching correct module names
200 | module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$
201 | 
202 | # Regular expression matching correct method names
203 | method-rgx=(?x)^(?:(?P<exempt>_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P<camel_case>_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P<snake_case>_{0,2}[a-z][a-z0-9_]*))$
204 | 
205 | # Regular expression which should only match function or class names that do
206 | # not require a docstring.
207 | no-docstring-rgx=(__.*__|main|test.*|.*test|.*Test)$
208 | 
209 | # Minimum line length for functions/classes that require docstrings, shorter
210 | # ones are exempt.
211 | docstring-min-length=12
212 | 
213 | 
214 | [TYPECHECK]
215 | 
216 | # List of decorators that produce context managers, such as
217 | # contextlib.contextmanager. Add to this list to register other decorators that
218 | # produce valid context managers.
219 | contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager
220 | 
221 | # List of module names for which member attributes should not be checked
222 | # (useful for modules/projects where namespaces are manipulated during runtime
223 | # and thus existing member attributes cannot be deduced by static analysis. It
224 | # supports qualified module names, as well as Unix pattern matching.
225 | ignored-modules=
226 | 
227 | # List of class names for which member attributes should not be checked (useful
228 | # for classes with dynamically set attributes). This supports the use of
229 | # qualified names.
230 | ignored-classes=optparse.Values,thread._local,_thread._local
231 | 
232 | # List of members which are set dynamically and missed by pylint inference
233 | # system, and so shouldn't trigger E1101 when accessed. Python regular
234 | # expressions are accepted.
235 | generated-members=
236 | 
237 | 
238 | [FORMAT]
239 | 
240 | # Maximum number of characters on a single line.
241 | max-line-length=88
242 | 
243 | # TODO(https://github.com/pylint-dev/pylint/issues/3352): Direct pylint to exempt
244 | # lines made too long by directives to pytype.
245 | 
246 | # Regexp for a line that is allowed to be longer than the limit.
247 | ignore-long-lines=(?x)(
248 |   ^\s*(\#\ )?<?https?://\S+>?$|
249 |   ^\s*(from\s+\S+\s+)?import\s+.+$)
250 | 
251 | # Allow the body of an if to be on the same line as the test if there is no
252 | # else.
253 | single-line-if-stmt=yes
254 | 
255 | # Maximum number of lines in a module
256 | max-module-lines=99999
257 | 
258 | # String used as indentation unit.  The internal Google style guide mandates 2
259 | # spaces.  Google's externaly-published style guide says 4, consistent with
260 | # PEP 8.  Here, we use 2 spaces, for conformity with many open-sourced Google
261 | # projects (like TensorFlow).
262 | indent-string='    '
263 | 
264 | # Number of spaces of indent required inside a hanging  or continued line.
265 | indent-after-paren=4
266 | 
267 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
268 | expected-line-ending-format=
269 | 
270 | 
271 | [MISCELLANEOUS]
272 | 
273 | # List of note tags to take in consideration, separated by a comma.
274 | notes=TODO
275 | 
276 | 
277 | [STRING]
278 | 
279 | # This flag controls whether inconsistent-quotes generates a warning when the
280 | # character used as a quote delimiter is used inconsistently within a module.
281 | check-quote-consistency=yes
282 | 
283 | 
284 | [VARIABLES]
285 | 
286 | # Tells whether we should check for unused import in __init__ files.
287 | init-import=no
288 | 
289 | # A regular expression matching the name of dummy variables (i.e. expectedly
290 | # not used).
291 | dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_)
292 | 
293 | # List of additional names supposed to be defined in builtins. Remember that
294 | # you should avoid to define new builtins when possible.
295 | additional-builtins=
296 | 
297 | # List of strings which can identify a callback function by name. A callback
298 | # name must start or end with one of those strings.
299 | callbacks=cb_,_cb
300 | 
301 | # List of qualified module names which can have objects that can redefine
302 | # builtins.
303 | redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools
304 | 
305 | 
306 | [LOGGING]
307 | 
308 | # Logging modules to check that the string format arguments are in logging
309 | # function parameter format
310 | logging-modules=logging,absl.logging,tensorflow.io.logging
311 | 
312 | 
313 | [SIMILARITIES]
314 | 
315 | # Minimum lines number of a similarity.
316 | min-similarity-lines=4
317 | 
318 | # Ignore comments when computing similarities.
319 | ignore-comments=yes
320 | 
321 | # Ignore docstrings when computing similarities.
322 | ignore-docstrings=yes
323 | 
324 | # Ignore imports when computing similarities.
325 | ignore-imports=no
326 | 
327 | 
328 | [SPELLING]
329 | 
330 | # Spelling dictionary name. Available dictionaries: none. To make it working
331 | # install python-enchant package.
332 | spelling-dict=
333 | 
334 | # List of comma separated words that should not be checked.
335 | spelling-ignore-words=
336 | 
337 | # A path to a file that contains private dictionary; one word per line.
338 | spelling-private-dict-file=
339 | 
340 | # Tells whether to store unknown words to indicated private dictionary in
341 | # --spelling-private-dict-file option instead of raising a message.
342 | spelling-store-unknown-words=no
343 | 
344 | 
345 | [IMPORTS]
346 | 
347 | # Deprecated modules which should not be used, separated by a comma
348 | deprecated-modules=regsub,
349 |                    TERMIOS,
350 |                    Bastion,
351 |                    rexec,
352 |                    sets
353 | 
354 | # Create a graph of every (i.e. internal and external) dependencies in the
355 | # given file (report RP0402 must not be disabled)
356 | import-graph=
357 | 
358 | # Create a graph of external dependencies in the given file (report RP0402 must
359 | # not be disabled)
360 | ext-import-graph=
361 | 
362 | # Create a graph of internal dependencies in the given file (report RP0402 must
363 | # not be disabled)
364 | int-import-graph=
365 | 
366 | # Force import order to recognize a module as part of the standard
367 | # compatibility libraries.
368 | known-standard-library=
369 | 
370 | # Force import order to recognize a module as part of a third party library.
371 | known-third-party=enchant, absl
372 | 
373 | # Analyse import fallback blocks. This can be used to support both Python 2 and
374 | # 3 compatible code, which means that the block might have code that exists
375 | # only in one or another interpreter, leading to false positives when analysed.
376 | analyse-fallback-blocks=no
377 | 
378 | 
379 | [CLASSES]
380 | 
381 | # List of method names used to declare (i.e. assign) instance attributes.
382 | defining-attr-methods=__init__,
383 |                       __new__,
384 |                       setUp
385 | 
386 | # List of member names, which should be excluded from the protected access
387 | # warning.
388 | exclude-protected=_asdict,
389 |                   _fields,
390 |                   _replace,
391 |                   _source,
392 |                   _make
393 | 
394 | # List of valid names for the first argument in a class method.
395 | valid-classmethod-first-arg=cls,
396 |                             class_
397 | 
398 | # List of valid names for the first argument in a metaclass class method.
399 | valid-metaclass-classmethod-first-arg=mcs
400 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Gr3atWh173
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: gunicorn -w 3 -k uvicorn.workers.UvicornWorker app.main:app


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Cleanbay
  2 | 
  3 | A metasearch engine for torrents
  4 | 
  5 | **Note**: The frontend code lives in
  6 | [this repo](https://github.com/gr3atwh173/cleanbay-frontend)
  7 | 
  8 | 0. [Supported trackers](#supported-trackers)
  9 | 1. [Live instances](#live-instances)
 10 | 2. [Setup](#setup)
 11 | 3. [API Endpoints](#api-endpoints)
 12 | 4. [Contributing](#contributing)
 13 | 
 14 | ## Supported trackers
 15 | 
 16 | Currently supported trackers are:
 17 | 
 18 | 1. Piratebay
 19 | 2. YTS
 20 | 3. EZTV
 21 | 4. LinuxTracker
 22 | 5. Libgen
 23 | 6. Nyaa
 24 | 
 25 | ## Live instances
 26 | 
 27 | You can find a running instance at:
 28 | 
 29 | 1. https://testbay.onrender.com or,
 30 | 2. https://cleanbay.netlify.app if you prefer a frontend
 31 | 
 32 | ## Setup
 33 | 
 34 | 1. Clone this repo
 35 | 
 36 | ```
 37 | git clone https://github.com/gr3atwh173/cleanbay.git
 38 | ```
 39 | 
 40 | 2. Install with [Poetry](https://pypi.org/project/poetry/)
 41 | 
 42 | ```
 43 | cd cleanbay
 44 | poetry install
 45 | ```
 46 | 
 47 | _Optional_: Create a `.env` file with the following parameters in the project
 48 | root:
 49 | 
 50 | ```
 51 | # directory where the plugins are located
 52 | # must have a __init__.py file
 53 | PLUGINS_DIRECTORY="./backend/plugins"
 54 | 
 55 | # rate limiting by IP
 56 | RATE_LIMIT="100/minute"
 57 | 
 58 | # cache size in 'entries'
 59 | CACHE_SIZE=128
 60 | 
 61 | # time (in seconds) before a cache item is invalidated
 62 | CACHE_TIMEOUT=300
 63 | 
 64 | # domain allowed to make cross-origin requests to the server
 65 | # '*' allows for any domain to request data
 66 | ALLOWED_ORIGIN="*"
 67 | ```
 68 | 
 69 | 3. Run the web API
 70 | 
 71 | ```
 72 | poetry run uvicorn app.main:app
 73 | ```
 74 | 
 75 | ## API endpoints
 76 | 
 77 | >**NOTE**: See the [auto-generated swagger docs](https://testbay.onrender.com/docs) for more up-to-date documentation
 78 | 
 79 | 1. `POST /api/v1/search/` expects
 80 | 
 81 | ```json
 82 | {
 83 |   "search_term": "...",
 84 |   "include_categories": ["cinema", "tv"],
 85 |   "exclude_categories": [],
 86 |   "include_sites": ["linuxtracker", "piratebay"],
 87 |   "exclude_sites": []
 88 | }
 89 | ```
 90 | 
 91 | and returns JSON with the following structure:
 92 | 
 93 | ```json
 94 | {
 95 |   "status": "ok",
 96 |   "length": 123,
 97 |   "cache_hit": true,
 98 |   "elapsed": 2.324,
 99 |   "data": [
100 |     {
101 |       "name": "...",
102 |       "magnet": "...",
103 |       "seeders": 12345,
104 |       "leechers": 1234,
105 |       "size": "...",
106 |       "uploader": "...",
107 |       "uploaded_at": "..."
108 |     }
109 |   ]
110 | }
111 | ```
112 | 
113 | in case of an error, the following is returned:
114 | 
115 | ```json
116 | {
117 |   "status": "error",
118 |   "msg": "why it happened"
119 | }
120 | ```
121 | 
122 | ---
123 | 
124 | **NOTE**
125 | 
126 | Categories are mapped like so:
127 | 
128 | ```
129 | "all" or "*" => ALL: Everything under the sun
130 | "general"    => GENERAL: Plugins that track everything
131 | "cinema"     => CINEMA: Plugins that track movies
132 | "tv"         => TV: Plugins that track shows on TV, OTT or anything that's not a movie
133 | "software"   => SOFTWARE: Plugins that track software excluding games
134 | "books"      => BOOKS: Plugins that index books or audiobooks
135 | ```
136 | 
137 | ---
138 | 
139 | 2. `GET /api/v1/status` returns JSON with the following structure
140 | 
141 | ```json
142 | {
143 |   "status": "ok", // or "not ok"
144 |   "plugins": ["loaded", "plugins"]
145 | }
146 | ```
147 | 
148 | ## Contributing
149 | 
150 | ### How you can contribute
151 | 
152 | This is a non-exhaustive list:
153 | 
154 | 1. Make a plugin (or two, or three, or four...)
155 | 2. Add new features to the backend, or make existing ones better!
156 | 3. Make a better frontend.
157 | 4. Write better documentation for the API.
158 | 5. Bug fixes, refactors, etc.
159 | 6. Suggest a feature.
160 | 
161 | In any case, thanks for contributing!
162 | 
163 | ### How to contribute
164 | 
165 | Before making a change, please first discuss the change you want to make via raising an issue.
166 | 
167 | 1. Fork and clone the repo
168 | 2. Run `poetry install` to install tha dependencies
169 | 3. Create a branch for your PR with `git checkout -b your-branch-name`
170 | 4. Code your changes.
171 | 5. Push the changes to your fork
172 | 6. Make a pull request!
173 | 


--------------------------------------------------------------------------------
/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gr3atWh173/cleanbay/f61a72e5d7ab7190db1c66241d425660d34dbc9e/app/__init__.py


--------------------------------------------------------------------------------
/app/helpers.py:
--------------------------------------------------------------------------------
 1 | """Contains helper functions for the API"""
 2 | 
 3 | from typing import Tuple
 4 | 
 5 | from cleanbay.torrent import Category
 6 | 
 7 | from app.schemas import SearchIn, CATEGORY_MAP
 8 | 
 9 | 
10 | def parse_search_query(sq: SearchIn) -> Tuple:
11 |     s_term = sq.search_term
12 | 
13 |     # if there's 'all' in the include category list, treat it as if the list was
14 |     # empty, ie, include everything
15 |     i_cats = []
16 |     if any(x in sq.include_categories for x in ["all", "*"]):
17 |         i_cats = CATEGORY_MAP.values()
18 |         i_cats.remove(Category.ALL)
19 |     else:
20 |         i_cats = [CATEGORY_MAP[cat] for cat in sq.include_categories]
21 | 
22 |     e_cats = [CATEGORY_MAP[cat] for cat in sq.exclude_categories]
23 | 
24 |     i_sites = sq.include_sites
25 |     e_sites = sq.exclude_sites
26 | 
27 |     return (s_term, i_cats, e_cats, i_sites, e_sites)
28 | 


--------------------------------------------------------------------------------
/app/main.py:
--------------------------------------------------------------------------------
  1 | """Serves the API that enables searching the backend"""
  2 | 
  3 | from itertools import chain
  4 | from datetime import datetime
  5 | 
  6 | from fastapi import FastAPI, HTTPException, Request, Response
  7 | from fastapi.responses import JSONResponse
  8 | from fastapi.middleware.cors import CORSMiddleware
  9 | 
 10 | from slowapi import Limiter, _rate_limit_exceeded_handler
 11 | from slowapi.util import get_remote_address
 12 | from slowapi.errors import RateLimitExceeded
 13 | 
 14 | from cleanbay.backend import Backend, InvalidSearchError
 15 | from cleanbay.plugins_manager import NoPluginsError, PluginsManager
 16 | from cleanbay.cache_manager import LFUCache
 17 | 
 18 | from app.settings import settings
 19 | from app.schemas import SearchIn, SearchOut, SearchError, StatusOut
 20 | from app.helpers import parse_search_query
 21 | 
 22 | # initialize tha app and the backend
 23 | cache_manager = LFUCache(settings.cache_size, settings.cache_timeout)
 24 | plugins_manager = PluginsManager(settings.plugins_directory)
 25 | backend = Backend(settings.session_timeout, cache_manager, plugins_manager)
 26 | 
 27 | app = FastAPI()
 28 | limiter = Limiter(key_func=get_remote_address)
 29 | app.state.limiter = limiter
 30 | app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
 31 | app.add_middleware(
 32 |     CORSMiddleware,
 33 |     allow_origins=[settings.allowed_origin],
 34 |     allow_credentials=True,
 35 |     allow_methods=["*"],
 36 |     allow_headers=["*"],
 37 | )
 38 | 
 39 | 
 40 | # this is primarily for backwards compat with the frontend
 41 | @app.exception_handler(HTTPException)
 42 | def http_exception_handler(
 43 |     req: Request, exc: HTTPException  # pylint: disable=unused-argument
 44 | ) -> JSONResponse:
 45 |     return JSONResponse({"status": "error", "msg": exc.detail}, exc.status_code)
 46 | 
 47 | 
 48 | # routes
 49 | 
 50 | 
 51 | @app.get("/api/v1/status", response_model=StatusOut)
 52 | @limiter.limit(settings.rate_limit)
 53 | def status(request: Request, response: Response):  # pylint: disable=unused-argument
 54 |     """Returns the current status and list of available plugins"""
 55 |     plugins, is_ok = backend.state()
 56 |     status_word = "ok" if is_ok else "not ok"
 57 | 
 58 |     return StatusOut(status=status_word, plugins=list(plugins))
 59 | 
 60 | 
 61 | @app.post(
 62 |     "/api/v1/search",
 63 |     response_model=SearchOut,
 64 |     responses={422: {"model": SearchError}},
 65 | )
 66 | @limiter.limit(settings.rate_limit)
 67 | async def search(
 68 |     request: Request, response: Response, sq: SearchIn
 69 | ):  # pylint: disable=unused-argument
 70 |     """Searches the relevant plugins for torrents"""
 71 |     is_valid, msg = validate(sq)
 72 |     if not is_valid:
 73 |         response.status_code = 422
 74 |         raise HTTPException(status_code=422, detail=msg)
 75 | 
 76 |     s_term, i_cats, e_cats, i_sites, e_sites = parse_search_query(sq)
 77 | 
 78 |     start_time = datetime.now()
 79 |     try:
 80 |         listings, cache_hit = await backend.search(
 81 |             search_term=s_term,
 82 |             include_categories=i_cats,
 83 |             exclude_categories=e_cats,
 84 |             include_sites=i_sites,
 85 |             exclude_sites=e_sites,
 86 |         )
 87 |     except NoPluginsError as exc:
 88 |         raise HTTPException(status_code=500, detail="No searchable plugins.") from exc
 89 |     except InvalidSearchError as exc:
 90 |         response.status_code = 400
 91 |         raise HTTPException(status_code=422, detail="Invalid search.") from exc
 92 |     elapsed = datetime.now() - start_time
 93 | 
 94 |     return SearchOut(
 95 |         status="ok", data=listings, cache_hit=cache_hit, elapsed=elapsed.total_seconds()
 96 |     )
 97 | 
 98 | 
 99 | def validate(sq: SearchIn) -> bool:
100 |     indexed_sites = list(backend.state()[0])
101 |     for site in chain(sq.include_sites, sq.exclude_sites):
102 |         if site not in indexed_sites:
103 |             or_string = f'{", ".join(indexed_sites[:-1])} or {indexed_sites[-1]}'
104 |             return (
105 |                 False,
106 |                 f'For now, "{site}" is not indexed. Perhaps you meant {or_string}',
107 |             )
108 |     return True, ""
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     import uvicorn
113 | 
114 |     uvicorn.run(app, reload=True)
115 | 


--------------------------------------------------------------------------------
/app/schemas.py:
--------------------------------------------------------------------------------
 1 | """Contains the request and response models for the API"""
 2 | 
 3 | from typing import List
 4 | 
 5 | from fastapi import HTTPException
 6 | 
 7 | from pydantic import BaseModel, field_validator, model_validator, computed_field
 8 | 
 9 | from cleanbay.torrent import Category, Torrent
10 | 
11 | CATEGORY_MAP = {
12 |     "all": Category.ALL,
13 |     "general": Category.GENERAL,
14 |     "cinema": Category.CINEMA,
15 |     "tv": Category.TV,
16 |     "software": Category.SOFTWARE,
17 |     "books": Category.BOOKS,
18 | }
19 | 
20 | 
21 | class SearchIn(BaseModel):
22 |     """Used to deserialize the JSON received in the request body
23 | 
24 |     Attributes:
25 |       search_term (str): The string to search for
26 |       include_categories (list): Categories in which to search
27 |       exclude_categories (list): Categories in which to not search
28 |       include_sites (list): Plugins/services to search
29 |       exclude_sites (list): Plugins/services to not search
30 | 
31 |     """
32 | 
33 |     search_term: str
34 |     include_categories: List[str] = []
35 |     exclude_categories: List[str] = []
36 |     include_sites: List[str] = []
37 |     exclude_sites: List[str] = []
38 | 
39 |     @field_validator("search_term")
40 |     @classmethod
41 |     def validate_search_term_not_empty(cls, search_term: str) -> str:
42 |         if search_term.strip() == "":
43 |             raise HTTPException(status_code=422, detail="No search term given.")
44 |         return search_term
45 | 
46 |     @field_validator("include_categories", "exclude_categories")
47 |     @classmethod
48 |     def validate_category_names(cls, category_list: list) -> list:
49 |         invalid_categories = list(
50 |             filter(lambda cat: cat not in CATEGORY_MAP, category_list)
51 |         )
52 |         if invalid_categories:
53 |             categories = list(CATEGORY_MAP.keys())
54 |             or_string = f"{', '.join(categories[:-1])} or {categories[-1]}"
55 |             raise HTTPException(
56 |                 status_code=422,
57 |                 # pylint: disable=line-too-long
58 |                 detail=f"No such categories: {', '.join(invalid_categories)}. Perhaps you meant {or_string}",
59 |             )
60 |         return category_list
61 | 
62 |     @model_validator(mode="after")
63 |     def validate_filter_variant_exclusivity(self) -> "SearchIn":
64 |         if self.include_categories and self.exclude_categories:
65 |             raise HTTPException(
66 |                 status_code=422,
67 |                 detail="Cannot use include and exclude categories together.",
68 |             )
69 |         if self.include_sites and self.exclude_sites:
70 |             raise HTTPException(
71 |                 status_code=422, detail="Cannot use include and exclude sites together."
72 |             )
73 |         return self
74 | 
75 | 
76 | class SearchOut(BaseModel):
77 |     status: str = "ok"
78 |     cache_hit: bool
79 |     elapsed: float
80 |     data: List[Torrent]
81 | 
82 |     @computed_field
83 |     @property
84 |     def length(self) -> int:
85 |         return len(self.data)
86 | 
87 | 
88 | class SearchError(BaseModel):
89 |     status: str
90 |     msg: str
91 | 
92 | 
93 | class StatusOut(BaseModel):
94 |     status: str
95 |     plugins: List[str]
96 | 


--------------------------------------------------------------------------------
/app/settings.py:
--------------------------------------------------------------------------------
 1 | """Contains the app settings"""
 2 | 
 3 | from pydantic_settings import BaseSettings
 4 | 
 5 | 
 6 | class Settings(BaseSettings):
 7 |     """Loads and represents app config from environment variables.
 8 | 
 9 |     Attributes:
10 |       plugins_directory (str): The directory where plugin files are stored
11 |       cache_size (int): Size for the cache
12 |       cache_timeout (int): How long the cache maintains an entry (in seconds)
13 |       session_timeout (int): Timeout for requests to external services (in seconds)
14 |       rate_limit (str): Rate limit descriptor
15 |       allowed_origin (str): Origin from which requests are allowed
16 | 
17 |     """
18 | 
19 |     plugins_directory: str = "./cleanbay/plugins"
20 |     cache_size: int = 128
21 |     cache_timeout: int = 300
22 |     session_timeout: int = 8
23 |     rate_limit: str = "100/minute"
24 |     allowed_origin: str = "*"
25 | 
26 | 
27 | settings = Settings()
28 | 


--------------------------------------------------------------------------------
/cleanbay/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gr3atWh173/cleanbay/f61a72e5d7ab7190db1c66241d425660d34dbc9e/cleanbay/__init__.py


--------------------------------------------------------------------------------
/cleanbay/abstract_plugin.py:
--------------------------------------------------------------------------------
 1 | """The module contains the abstract interface for plugins"""
 2 | from abc import ABC, abstractmethod
 3 | import asyncio  # pylint: disable=unused-import
 4 | import aiohttp
 5 | 
 6 | 
 7 | class AbstractPlugin(ABC):
 8 |     """All plugins must be derived from this abstract class."""
 9 | 
10 |     @abstractmethod
11 |     def verify_status(self) -> bool:
12 |         """Verifies the status of the external service used by the plugin.
13 | 
14 |         Returns `True` only if said service is online and usable;
15 |         'False' otherwise.
16 | 
17 |         """
18 |         pass
19 | 
20 |     @abstractmethod
21 |     async def search(self, session: aiohttp.ClientSession, search_param: str) -> list:
22 |         """Searches the external service.
23 | 
24 |         Args:
25 |           session (aiohttp.ClientSession): a session object that the plugin can use
26 |             to access the web.
27 |           search_param (str): the string to search for.
28 | 
29 |         """
30 |         pass
31 | 
32 |     @abstractmethod
33 |     def info(self) -> dict:
34 |         """Gives metadata about the plugin
35 | 
36 |         Must include 'name' and 'category' keys.
37 | 
38 |         """
39 |         pass
40 | 


--------------------------------------------------------------------------------
/cleanbay/backend.py:
--------------------------------------------------------------------------------
  1 | """Manages the plugins and the cache."""
  2 | 
  3 | import asyncio
  4 | 
  5 | from aiohttp import ClientSession, ClientTimeout, TCPConnector
  6 | 
  7 | from typing import Tuple
  8 | 
  9 | from .cache_manager import AbstractCacheManager
 10 | from .plugins_manager import PluginsManager
 11 | 
 12 | 
 13 | class InvalidSearchError(Exception):
 14 |     """Indicates that the search parameters were invalid."""
 15 | 
 16 |     pass
 17 | 
 18 | 
 19 | class Backend:
 20 |     """This class handles all behind-the-scenes logic.
 21 | 
 22 |     Handle the loading of the config and the plugins as well as searching each of
 23 |     the plugins asynchronusly.
 24 | 
 25 |     Attributes:
 26 |       config (dict): All the configuration information including which directory
 27 |       the plugins are in and what the cache size should be.
 28 |       plugins (dict): All the usable plugins hashed with their name.
 29 |       cache (dict): A simplistic lFU cache implementation.
 30 | 
 31 |     """
 32 | 
 33 |     def __init__(
 34 |         self,
 35 |         request_timeout: int,
 36 |         cache_manager: AbstractCacheManager,
 37 |         plugins_manager: PluginsManager,
 38 |     ):
 39 |         """Initializes the backend object.
 40 | 
 41 |         Arguments:
 42 |           request_timeout (int): Timeout for requests to external services (in seconds)
 43 |           cache_manager (AbstractCacheManager): A concrete impl for a cache
 44 |           plugins_manager (PluginsManager): A concrete impl for managing plugins.
 45 | 
 46 |         """
 47 |         self.timeout = request_timeout
 48 |         self.cache = cache_manager
 49 |         self.plugins_manager = plugins_manager
 50 | 
 51 |     def state(self):
 52 |         plugins = self.plugins_manager.plugins.keys()
 53 |         is_ok = bool(plugins)
 54 | 
 55 |         return (plugins, is_ok)
 56 | 
 57 |     async def search(
 58 |         self,
 59 |         search_term: str,
 60 |         include_categories: list,
 61 |         exclude_categories: list,
 62 |         include_sites: list,
 63 |         exclude_sites: list,
 64 |     ) -> Tuple:
 65 |         """Searches the relevant plugins for torrents.
 66 | 
 67 |         Looks in the cache first. Ideally finds the listings there.
 68 | 
 69 |         In case of a miss, invokes the search method of each plugin (which might
 70 |         be time consuming).
 71 | 
 72 |         Note:
 73 |           1. This will cause the cache to update in case of a miss. Which, if it is
 74 |           full, might cause even more delay.
 75 |           2. For each filter, use either the include or the exclude variant. Using
 76 |           both may cause undefined behaviour.
 77 | 
 78 |         Args:
 79 |           search_param (str): The string to search for.
 80 |           include_categories (list): Categories of plugins to search
 81 |           exclude_categories (list): Categories of plugins to not search
 82 |           include_sites (list): Names of services to search
 83 |           exclude_sites (list): Names of services to not search
 84 | 
 85 |         Returns:
 86 |           A tuple in the form ([], bool). The bool is True in case of a cache hit,
 87 |           False otherwise.
 88 | 
 89 |         Raises:
 90 |           InvalidSearchError: if both include and exclude variants of a filter are
 91 |           used together or if no plugins are left after filtering.
 92 | 
 93 |         """
 94 |         # should not be using include and exclude together
 95 |         if include_categories and exclude_categories or include_sites and exclude_sites:
 96 |             raise InvalidSearchError()
 97 | 
 98 |         search_term = search_term.lower()
 99 | 
100 |         plugins = self.plugins_manager.filter_plugins(
101 |             include_categories, exclude_categories, include_sites, exclude_sites
102 |         )
103 | 
104 |         results, cache_hit = self.try_cache(search_term, plugins)
105 |         if not cache_hit:
106 |             results = await self.update_cache(search_term, plugins)
107 | 
108 |         return (results, cache_hit)
109 | 
110 |     def try_cache(self, search_param: str, plugins: list) -> Tuple:
111 |         """Returns the listings from the cache.
112 | 
113 |         Args:
114 |           search_param (str): The string to search for.
115 |           plugins (list): Plugin objects implementing the `search()` method.
116 | 
117 |         Returns:
118 |           A tuple containing a list of torrents and a bool denoting if there was a
119 |           cache hit or not.
120 | 
121 |         """
122 |         cache_hit = self.cache.read(search_param, plugins)
123 | 
124 |         if not cache_hit:
125 |             return [], False
126 | 
127 |         return cache_hit, True
128 | 
129 |     async def update_cache(self, search_param: str, plugins: list) -> list:
130 |         """Updates the cache.
131 | 
132 |         Searches each plugin in the category and puts its results into the cache.
133 | 
134 |         Note:
135 |           If the cache has grown more than the size specified in the config
136 |           file - deletes the least frequently used entry and replaces it.
137 | 
138 |         Args:
139 |           search_param (str): the string to search for.
140 |           plugins (list): Plugin objects implementing the `search()` method.
141 | 
142 |         Returns:
143 |           List of torrents matching the search query
144 | 
145 |         """
146 |         results = await self.search_plugins(search_param, plugins)
147 | 
148 |         if not results:
149 |             return []
150 | 
151 |         self.cache.store(search_param, plugins, results)
152 | 
153 |         return results
154 | 
155 |     async def search_plugins(self, search_param: str, plugins: list) -> list:
156 |         """Searches the plugins etxcept the ones passed in `except_plugins`
157 | 
158 |         This is an asynchronus function which fires off the plugins, which, in turn,
159 |         sends off HTTP requests, parse the results, and return their respective
160 |         results.
161 | 
162 |         Args:
163 |           search_param (str): the string to search for.
164 |           plugins (list): Plugin objects implementing the `search()` method.
165 | 
166 |         Returns:
167 |           A list of compiled results from the specified plugin.
168 | 
169 |         """
170 |         results = []
171 | 
172 |         session_timeout = ClientTimeout(total=self.timeout)
173 |         async with ClientSession(
174 |             connector=TCPConnector(ssl=False), timeout=session_timeout
175 |         ) as session:
176 |             tasks = self.create_search_tasks(session, search_param, plugins)
177 |             results = await asyncio.gather(*tasks, return_exceptions=True)
178 | 
179 |         results = self.exclude_errors(results)
180 | 
181 |         return self.flatten(results)
182 | 
183 |     def create_search_tasks(
184 |         self, session: ClientSession, search_param: str, plugins: list
185 |     ) -> list:
186 |         """Creates async tasks for each plugin"""
187 |         tasks = []
188 |         for plugin in plugins:
189 |             search_future = plugin.search(session, search_param)
190 |             task = asyncio.create_task(search_future)
191 |             tasks.append(task)
192 | 
193 |         return tasks
194 | 
195 |     def exclude_errors(self, listings: list):
196 |         return [listing for listing in listings if isinstance(listing, list)]
197 | 
198 |     def flatten(self, t: list) -> list:
199 |         return [item for sublist in t for item in sublist]
200 | 


--------------------------------------------------------------------------------
/cleanbay/cache_manager/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=missing-module-docstring
2 | from .abstract_cache_manager import AbstractCacheManager
3 | from .lfu_cache import LFUCache
4 | 


--------------------------------------------------------------------------------
/cleanbay/cache_manager/abstract_cache_manager.py:
--------------------------------------------------------------------------------
 1 | """Contains the cache manager interface/abstract class"""
 2 | from abc import ABC, abstractmethod
 3 | 
 4 | 
 5 | class AbstractCacheManager(ABC):
 6 |     """All cache managers must be derived from this class"""
 7 | 
 8 |     @abstractmethod
 9 |     def store(self, search_term: str, plugins: list, listings: list):
10 |         """Stores a search result into the cache.
11 | 
12 |         Arguments:
13 |         search_term (str): The string that was searched.
14 |         plugins (list): List of Plugin objects used in the search.
15 |         listings (list): List of Torrents returned from the search.
16 | 
17 |         """
18 |         pass
19 | 
20 |     @abstractmethod
21 |     def read(self, search_term: str, plugins: list) -> list:
22 |         """Reads an item from the cache.
23 | 
24 |         Arguments:
25 |           search_term (str): The string that was searched.
26 |           plugins (list): List of Plugin objects used in the search.
27 | 
28 |         Returns:
29 |           A list of Torrents.
30 | 
31 |         """
32 |         pass
33 | 


--------------------------------------------------------------------------------
/cleanbay/cache_manager/lfu_cache.py:
--------------------------------------------------------------------------------
  1 | """Contains the implementation for LFU-based cache manager"""
  2 | from datetime import datetime, timedelta
  3 | 
  4 | from typing import Tuple
  5 | 
  6 | from cleanbay.cache_manager.abstract_cache_manager import AbstractCacheManager
  7 | 
  8 | 
  9 | class LFUCache(AbstractCacheManager):
 10 |     """Manages an LFU cache with a timeout.
 11 | 
 12 |     Attributes:
 13 |       lines (dict): Cache items hashed by the tuple of the search term and the
 14 |       names of the plugins utilized in the search.
 15 |       max_size (int): Maximum number of entries in the cache.
 16 |       timeout (timedelta): Time in seconds after which a cache entry is
 17 |       invalidated.
 18 | 
 19 |     """
 20 | 
 21 |     def __init__(self, max_size: int, timeout: int):
 22 |         """Initializes the cache.
 23 | 
 24 |         Arguments:
 25 |           max_size (int): Maximum number of entries in the cache.
 26 |           timeout (int): Time in seconds after which a cache entry is invalidated.
 27 | 
 28 |         """
 29 |         self.lines = {}
 30 |         self.max_size = max_size
 31 |         self.timeout = timedelta(seconds=timeout)
 32 | 
 33 |     def store(self, search_term: str, plugins: list, listings: list):
 34 |         """Stores a search result into the cache.
 35 | 
 36 |         If the cache is at its maximum size, the least frequently used item is
 37 |         deleted before storing the incoming item.
 38 | 
 39 |         Arguments:
 40 |           search_term (str): The string that was searched.
 41 |           plugins (list): List of Plugin objects used in the search.
 42 |           listings (list): List of Torrents returned from the search.
 43 | 
 44 |         """
 45 |         if len(self.lines) == self.max_size:
 46 |             lfu = self.least_frequently_used()
 47 |             del self.lines[lfu]
 48 | 
 49 |         key = self.make_key(search_term, plugins)
 50 |         self.lines[key] = {
 51 |             "listings": listings,
 52 |             "hit_count": 1,
 53 |             "store_time": datetime.now(),
 54 |         }
 55 | 
 56 |     def read(self, search_term: str, plugins: list) -> list:
 57 |         """Reads an item from the cache.
 58 | 
 59 |         If the item is in the cache, its 'hit_count' is increased by 1. In case of
 60 |         a cache miss, an empty list is returned.
 61 | 
 62 |         Arguments:
 63 |           search_term (str): The string that was searched.
 64 |           plugins (list): List of Plugin objects used in the search.
 65 | 
 66 |         Returns:
 67 |           A list of Torrents.
 68 | 
 69 |         """
 70 |         key = self.make_key(search_term, plugins)
 71 | 
 72 |         if key not in self.lines:
 73 |             return {}
 74 |         if not self.is_valid(self.lines[key]):
 75 |             return {}
 76 | 
 77 |         self.lines[key]["hit_count"] += 1
 78 |         return self.lines[key]["listings"]
 79 | 
 80 |     def is_valid(self, line: dict) -> bool:
 81 |         """Checks if the cache item has timed out.
 82 | 
 83 |         Arguments:
 84 |           line (dict): The cache line to check.
 85 | 
 86 |         Returns:
 87 |           True if the item hasn't timed out. False otherwise.
 88 |         """
 89 |         current_time = datetime.now()
 90 |         store_time = line["store_time"]
 91 | 
 92 |         return current_time - store_time < self.timeout
 93 | 
 94 |     def make_key(self, search_term: str, plugins: list) -> Tuple:
 95 |         names = [plugin.info()["name"] for plugin in plugins]
 96 |         return (search_term, frozenset(names))
 97 | 
 98 |     def least_frequently_used(self):
 99 |         return min(self.lines.items(), key=lambda x: x[1]["hit_count"])[0]
100 | 


--------------------------------------------------------------------------------
/cleanbay/plugins/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gr3atWh173/cleanbay/f61a72e5d7ab7190db1c66241d425660d34dbc9e/cleanbay/plugins/__init__.py


--------------------------------------------------------------------------------
/cleanbay/plugins/eztv.py:
--------------------------------------------------------------------------------
 1 | """Contains the impl  for the eztv plugin"""
 2 | 
 3 | from bs4 import BeautifulSoup, SoupStrainer
 4 | from requests import get as sync_get
 5 | 
 6 | from ..abstract_plugin import AbstractPlugin
 7 | from ..torrent import Torrent, Category
 8 | 
 9 | 
10 | class CBPlugin(AbstractPlugin):  # pylint: disable=missing-class-docstring
11 |     def verify_status(self):
12 |         return sync_get(self.info()["domain"], timeout=10).status_code == 200
13 | 
14 |     async def search(self, session, search_param):
15 |         info = self.info()
16 |         url = info["search_url"] + search_param
17 |         resp = await session.get(url)
18 | 
19 |         strainer = SoupStrainer("table")
20 |         resp = BeautifulSoup(await resp.text(), features="lxml", parse_only=strainer)
21 | 
22 |         table = resp.findChildren("table")[4]
23 |         if len(table) == 0:
24 |             return []
25 | 
26 |         torrents = []
27 |         for row in table.findChildren("tr")[2:]:
28 |             seeders = row.findChildren("td")[5].text
29 |             if not seeders.isnumeric():
30 |                 seeders = 0
31 |             else:
32 |                 seeders = int(seeders)
33 | 
34 |             try:
35 |                 magnet = row.findChildren("td")[2].findChildren("a")[0]["href"]
36 |             except IndexError:
37 |                 continue
38 | 
39 |             torrents.append(
40 |                 Torrent(
41 |                     row.findChildren("td")[1].text.strip(),
42 |                     magnet,
43 |                     seeders,
44 |                     -1,
45 |                     row.findChildren("td")[3].text,
46 |                     "eztv",
47 |                     row.findChildren("td")[4].text,
48 |                 )
49 |             )
50 |         return torrents
51 | 
52 |     def info(self):
53 |         return {
54 |             "name": "eztv",
55 |             "category": Category.TV,
56 |             "domain": "https://eztv.re",
57 |             "search_url": "https://eztv.re/search/",
58 |         }
59 | 


--------------------------------------------------------------------------------
/cleanbay/plugins/leetx.py.disabled:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup, SoupStrainer
 2 | import asyncio # pylint: disable=unused-import
 3 | 
 4 | from ..abstract_plugin import AbstractPlugin
 5 | from ..torrent import Torrent, Category
 6 | 
 7 | 
 8 | class CBPlugin(AbstractPlugin):
 9 |   def verify_status(self):
10 |     return True
11 | 
12 |   async def search(self, session, search_param):
13 |     info = self.info()
14 |     domain, useragent = info['domain'], info['user-agent']
15 |     url = f'{domain}/search/{search_param}/1/'
16 |     resp = await session.get(url, headers={'User-Agent': useragent})
17 | 
18 |     strainer = SoupStrainer('table')
19 |     resp = BeautifulSoup(
20 |         await resp.text(),
21 |         features='lxml',
22 |         parse_only=strainer)
23 | 
24 |     table = resp.findChildren('table')
25 |     if len(table) == 0:
26 |       return []
27 | 
28 |     torrents = []
29 |     for row in table[0].findChildren('tr')[1:]:
30 |       torrents.append(Torrent(
31 |           row.findChildren('td')[0].findChildren('a')[1].text,
32 |           # TODO(gr3atwh173): create a function to get the magnet from this link
33 |           row.findChildren('td')[0].findChildren('a')[1]['href'],
34 |           int(row.findChildren('td')[1].text),
35 |           int(row.findChildren('td')[2].text),
36 |           row.findChildren('td')[4].text.split('B')[0] + 'B',
37 |           row.findChildren('td')[5].text,
38 |           row.findChildren('td')[3].text
39 |       ))
40 |     return torrents
41 | 
42 |   def info(self):
43 |     return {
44 |         'name': 'leetx',
45 |         'category': Category.GENERAL,
46 |         'domain': 'https://1337x.to',
47 |         'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
48 |     }
49 | 


--------------------------------------------------------------------------------
/cleanbay/plugins/libgen.py:
--------------------------------------------------------------------------------
 1 | """Contains the imple for the libgen plugin"""
 2 | 
 3 | from urllib.parse import quote as uri_quote
 4 | import aiohttp
 5 | 
 6 | import requests
 7 | 
 8 | from bs4 import BeautifulSoup, SoupStrainer
 9 | 
10 | from ..torrent import Torrent, Category
11 | from ..abstract_plugin import AbstractPlugin
12 | 
13 | 
14 | class CBPlugin(AbstractPlugin):  # pylint: disable=missing-class-docstring
15 |     def verify_status(self) -> bool:
16 |         domain = self.info()["domain"]
17 |         return requests.get(domain, timeout=10).status_code == 200
18 | 
19 |     async def search(self, session: aiohttp.ClientSession, search_param: str) -> list:
20 |         domain = self.info()["domain"]
21 |         search_param = uri_quote(search_param)
22 |         res = await session.get(f"{domain}/search.php?req={search_param}")
23 | 
24 |         strainer = SoupStrainer("table")
25 |         soup = BeautifulSoup(await res.text(), features="lxml", parse_only=strainer)
26 | 
27 |         table = soup.findChildren("table")[2]
28 | 
29 |         torrents = []
30 |         for row in table.findChildren("tr")[1:]:
31 |             cols = row.findChildren("td")
32 | 
33 |             author = cols[1].text
34 |             title = cols[2].text
35 |             publisher = cols[3].text
36 |             year = cols[4].text
37 |             pages = cols[5].text
38 |             language = cols[6].text
39 |             size = cols[7].text
40 |             download = cols[9].find("a")["href"]
41 | 
42 |             # construct the name
43 |             name = []
44 |             if author:
45 |                 name.append(f"[{author}]")
46 |             if title:
47 |                 name.append(title)
48 |             name = " ".join(name)
49 | 
50 |             # construct additional info
51 |             info = []
52 |             if publisher:
53 |                 info.append(publisher)
54 |             if language:
55 |                 info.append(language)
56 |             if year:
57 |                 info.append(year)
58 |             if pages:
59 |                 info.append(f"{pages}p")
60 |             info = ", ".join(info)
61 | 
62 |             if info:
63 |                 name += f" ({info})"
64 | 
65 |             torrents.append(
66 |                 Torrent(name, download, 1, -1, size.upper(), "libgen", year)
67 |             )
68 | 
69 |         return torrents
70 | 
71 |     def info(self) -> dict:
72 |         return {
73 |             "name": "libgen",
74 |             "category": Category.BOOKS,
75 |             "domain": "https://libgen.is",
76 |         }
77 | 


--------------------------------------------------------------------------------
/cleanbay/plugins/linuxtracker.py:
--------------------------------------------------------------------------------
 1 | """Contains the impl for the linuxtracker plugin"""
 2 | 
 3 | from bs4 import BeautifulSoup
 4 | from requests import get as sync_get
 5 | 
 6 | from ..abstract_plugin import AbstractPlugin
 7 | from ..torrent import Torrent, Category
 8 | 
 9 | 
10 | class CBPlugin(AbstractPlugin):  # pylint: disable=missing-class-docstring
11 |     def info(self):
12 |         return {
13 |             "name": "linuxtracker",
14 |             "category": Category.SOFTWARE,
15 |             "domain": "https://linuxtracker.org",
16 |         }
17 | 
18 |     def verify_status(self):
19 |         domain = self.info()["domain"]
20 |         return sync_get(domain, timeout=10).status_code == 200
21 | 
22 |     async def search(self, session, search_param):
23 |         domain = self.info()["domain"]
24 |         search_url = "{}/index.php?page=torrents&search={}&category=0&active=1"
25 | 
26 |         resp = await session.get(search_url.format(domain, search_param))
27 |         soup = BeautifulSoup(await resp.text(), features="lxml")
28 | 
29 |         table = soup.find_all("table", {"class": "lista"})[4]
30 |         if len(table) == 0:
31 |             return []
32 | 
33 |         torrents = []
34 |         for row in table.findChildren("tr")[1:]:
35 |             try:
36 |                 name = row.find_all("td")[1].find_all()[2].text
37 |                 magnet = row.find_all("td")[1].find_all()[26].find_all("a")[1]["href"]
38 |                 date, size, seeders, leechers = self.extract_info(
39 |                     row.find_all("td")[1].find_all()[7].text.split("\n")[2:6]
40 |                 )
41 | 
42 |                 torrents.append(
43 |                     Torrent(
44 |                         name,
45 |                         magnet,
46 |                         int(seeders),
47 |                         int(leechers),
48 |                         size,
49 |                         "linuxtracker",
50 |                         date,
51 |                     )
52 |                 )
53 |             except IndexError:
54 |                 pass
55 | 
56 |         return torrents
57 | 
58 |     def extract_info(self, raw_list):
59 |         date = raw_list[0].split(":")[1].strip()
60 |         size = raw_list[1].split(":")[1].strip()
61 |         seeders = raw_list[2].strip().split(" ")[1]
62 |         leechers = raw_list[3].strip().split(" ")[1]
63 | 
64 |         return (date, size, seeders, leechers)
65 | 


--------------------------------------------------------------------------------
/cleanbay/plugins/nyaa.py:
--------------------------------------------------------------------------------
 1 | """Contains the impl for the nyaa plugin"""
 2 | 
 3 | from bs4 import BeautifulSoup, SoupStrainer
 4 | from requests import get as sync_get
 5 | 
 6 | from ..abstract_plugin import AbstractPlugin
 7 | from ..torrent import Torrent, Category
 8 | 
 9 | 
10 | class CBPlugin(AbstractPlugin):  # pylint: disable=missing-class-docstring
11 |     def verify_status(self):
12 |         return sync_get(self.info()["domain"], timeout=10).status_code == 200
13 | 
14 |     async def search(self, session, search_param):
15 |         info = self.info()
16 |         url = info["search_url"] + search_param
17 |         resp = await session.get(url)
18 | 
19 |         strainer = SoupStrainer("table")
20 |         resp = BeautifulSoup(await resp.text(), features="lxml", parse_only=strainer)
21 | 
22 |         table = resp.findChildren("table")[0]
23 | 
24 |         if len(table) == 0:
25 |             return []
26 | 
27 |         torrents = []
28 |         for row in table.findChildren("tr")[1:]:
29 |             row_children = row.findChildren("td")
30 | 
31 |             seeders = row_children[5].text
32 |             if not seeders.isnumeric():
33 |                 seeders = 0
34 |             else:
35 |                 seeders = int(seeders)
36 | 
37 |             leechers = row_children[6].text
38 |             if not leechers.isnumeric():
39 |                 leechers = 0
40 |             else:
41 |                 leechers = int(leechers)
42 | 
43 |             try:
44 |                 magnet = row_children[2].findChildren("a")[1]["href"]
45 |             except IndexError:
46 |                 continue
47 | 
48 |             torrents.append(
49 |                 Torrent(
50 |                     row_children[1].text.strip(),
51 |                     magnet,
52 |                     seeders,
53 |                     leechers,
54 |                     row_children[3].text.replace("i", ""),
55 |                     "nyaa",
56 |                     row_children[4].text,
57 |                 )
58 |             )
59 |         return torrents
60 | 
61 |     def info(self):
62 |         return {
63 |             "name": "nyaa",
64 |             "category": Category.TV,
65 |             "domain": "https://nyaa.iss.ink/",
66 |             "search_url": "https://nyaa.iss.ink/?f=0&c=0_0&q=",
67 |         }
68 | 


--------------------------------------------------------------------------------
/cleanbay/plugins/piratebay.py:
--------------------------------------------------------------------------------
 1 | """Contains the impl for the piratebay plugin"""
 2 | 
 3 | from requests import get as get_sync
 4 | from urllib.parse import quote as uri_quote
 5 | from datetime import datetime, timezone
 6 | 
 7 | import math
 8 | 
 9 | from ..abstract_plugin import AbstractPlugin
10 | from ..torrent import Torrent, Category
11 | 
12 | 
13 | class CBPlugin(AbstractPlugin):  # pylint: disable=missing-class-docstring
14 |     def info(self):
15 |         return {
16 |             "name": "piratebay",
17 |             "category": Category.GENERAL,
18 |             "domain": "https://apibay.org",
19 |             # pylint: disable=line-too-long
20 |             "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
21 |         }
22 | 
23 |     def verify_status(self):
24 |         domain, useragent = self.info()["domain"], self.info()["user-agent"]
25 |         return (
26 |             get_sync(domain, headers={"user-agent": useragent}, timeout=10).status_code
27 |             != 500
28 |         )
29 | 
30 |     async def search(self, session, search_param):
31 |         domain, useragent = self.info()["domain"], self.info()["user-agent"]
32 | 
33 |         resp = await session.get(
34 |             f"{domain}/q.php?q={search_param}&cat=", headers={"user-agent": useragent}
35 |         )
36 | 
37 |         if resp.status != 200:
38 |             return []
39 | 
40 |         torrents = []
41 |         for element in await resp.json():
42 |             torrents.append(
43 |                 Torrent(
44 |                     element["name"],
45 |                     self.make_magnet(element["info_hash"], element["name"]),
46 |                     int(element["seeders"]),
47 |                     int(element["leechers"]),
48 |                     self.format_size(int(element["size"])),
49 |                     element["username"],
50 |                     self.format_date(int(element["added"])),
51 |                 )
52 |             )
53 |         return torrents
54 | 
55 |     def make_magnet(self, ih, name):
56 |         return f"magnet:?xt=urn:btih:{ih}&dn={uri_quote(name)}&tr={self.trackers()}"
57 | 
58 |     def trackers(self):
59 |         trackers = "&tr=".join(
60 |             [
61 |                 "udp://tracker.coppersurfer.tk:6969/announce",
62 |                 "udp://tracker.openbittorrent.com:6969/announce",
63 |                 "udp://9.rarbg.to:2710/announce",
64 |                 "udp://9.rarbg.me:2780/announce",
65 |                 "udp://9.rarbg.to:2730/announce",
66 |                 "udp://tracker.opentrackr.org:1337",
67 |                 "http://p4p.arenabg.com:1337/announce",
68 |                 "udp://tracker.torrent.eu.org:451/announce",
69 |                 "udp://tracker.tiny-vps.com:6969/announce",
70 |                 "udp://open.stealth.si:80/announce",
71 |             ]
72 |         )
73 |         return uri_quote(trackers)
74 | 
75 |     def format_size(self, size_bytes):
76 |         if size_bytes == 0:
77 |             return "0B"
78 |         size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
79 |         i = int(math.floor(math.log(size_bytes, 1024)))
80 |         p = math.pow(1024, i)
81 |         s = round(size_bytes / p, 2)
82 |         # return "%s %s" % (s, size_name[i])
83 |         return f"{s} {size_name[i]}"
84 | 
85 |     def format_date(self, epoch):
86 |         return datetime.fromtimestamp(epoch, timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
87 | 


--------------------------------------------------------------------------------
/cleanbay/plugins/yts.py:
--------------------------------------------------------------------------------
 1 | """Contains the impl for the yts plugin"""
 2 | 
 3 | from requests import get as get_sync
 4 | from urllib.parse import quote as uri_quote
 5 | 
 6 | from ..abstract_plugin import AbstractPlugin
 7 | from ..torrent import Torrent, Category
 8 | 
 9 | 
10 | class CBPlugin(AbstractPlugin):  # pylint: disable=missing-class-docstring
11 |     def verify_status(self) -> bool:
12 |         domain = self.info()["domain"]
13 |         return get_sync(domain, timeout=10).status_code == 200
14 | 
15 |     def info(self) -> dict:
16 |         return {
17 |             "name": "yts",
18 |             "category": Category.CINEMA,
19 |             "api_url": "https://yts.mx/api/v2/list_movies.json?query_term=",
20 |             "domain": "https://yts.mx",
21 |         }
22 | 
23 |     async def search(self, session, search_param):
24 |         api_url = self.info()["api_url"]
25 |         resp = await session.get(api_url + uri_quote(search_param))
26 |         resp = await resp.json()
27 | 
28 |         if resp["status"] != "ok" or resp["data"]["movie_count"] == 0:
29 |             return []
30 | 
31 |         torrents = []
32 |         for element in resp["data"]["movies"]:
33 |             max_seed_torrent = max(element["torrents"], key=lambda x: x["seeds"])
34 | 
35 |             title_long = element["title_long"]
36 |             slug = element["slug"]
37 |             quality = max_seed_torrent["quality"]
38 |             type_ = max_seed_torrent["type"]
39 |             info_hash = max_seed_torrent["hash"]
40 |             seeders = max_seed_torrent["seeds"]
41 |             leechers = max_seed_torrent["peers"]
42 |             size = max_seed_torrent["size"]
43 |             date_uploaded = max_seed_torrent["date_uploaded"]
44 | 
45 |             torrents.append(
46 |                 Torrent(
47 |                     f"{title_long} [{quality}] [{type_}]",
48 |                     self.make_magnet(slug, info_hash),
49 |                     int(seeders),
50 |                     int(leechers),
51 |                     size,
52 |                     "yts",
53 |                     date_uploaded,
54 |                 )
55 |             )
56 | 
57 |         return torrents
58 | 
59 |     def make_magnet(self, slug, ih):
60 |         return f"magnet:?xt=urn:btih:{ih}&dn={slug}&tr={self.trackers()}"
61 | 
62 |     def trackers(self):
63 |         trackers = "&tr=".join(
64 |             [
65 |                 "udp://open.demonii.com:1337/announce",
66 |                 "udp://tracker.openbittorrent.com:80",
67 |                 "udp://tracker.coppersurfer.tk:6969",
68 |                 "udp://glotorrents.pw:6969/announce",
69 |                 "udp://tracker.opentrackr.org:1337/announce",
70 |                 "udp://torrent.gresille.org:80/announce",
71 |                 "udp://p4p.arenabg.com:1337",
72 |                 "udp://tracker.leechers-paradise.org:6969",
73 |             ]
74 |         )
75 |         return uri_quote(trackers)
76 | 


--------------------------------------------------------------------------------
/cleanbay/plugins_manager/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=missing-module-docstring
2 | from .plugins_manager import PluginsManager, NoPluginsError
3 | 


--------------------------------------------------------------------------------
/cleanbay/plugins_manager/plugins_manager.py:
--------------------------------------------------------------------------------
  1 | """Contains PluginsManager and NoPluginsError"""
  2 | from importlib import import_module
  3 | from os.path import isfile, basename
  4 | import glob
  5 | 
  6 | 
  7 | class NoPluginsError(Exception):
  8 |     """Indicates that no usable plugins could be loaded."""
  9 | 
 10 |     pass
 11 | 
 12 | 
 13 | class PluginsManager:
 14 |     """Manages the loading and filtering of plugins.
 15 | 
 16 |     Attributes:
 17 |       plugins (dict): Plugin objects hashed by their names
 18 | 
 19 |     """
 20 | 
 21 |     def __init__(self, directory: str):
 22 |         """Loads the plugins.
 23 | 
 24 |         Arguments:
 25 |           directory (str): The directory to load the plugins from.
 26 | 
 27 |         """
 28 |         self.plugins = {}
 29 | 
 30 |         # import all the files ending with `.py` except __init__
 31 |         modules = glob.glob(f"{directory}/*.py")
 32 |         plugins = [
 33 |             import_module(f"cleanbay.plugins.{basename(f)[:-3]}")
 34 |             for f in modules
 35 |             if isfile(f) and not f.endswith("__init__.py")
 36 |         ]
 37 | 
 38 |         # filter out the unusable plugins
 39 |         for plugin in plugins:
 40 |             try:
 41 |                 plugin = plugin.CBPlugin()
 42 |                 info = plugin.info()
 43 | 
 44 |                 if not plugin.verify_status():
 45 |                     continue
 46 |                 if ("name" not in info) or ("category" not in info):
 47 |                     continue
 48 | 
 49 |                 self.plugins[info["name"]] = plugin
 50 |             except TypeError:
 51 |                 # TODO(gr3atwh173): add logging
 52 |                 pass
 53 |             except:  # pylint: disable=bare-except
 54 |                 pass
 55 | 
 56 |     def filter_plugins(
 57 |         self,
 58 |         include_categories: list,
 59 |         exclude_categories: list,
 60 |         include_sites: list,
 61 |         exclude_sites: list,
 62 |     ) -> list:
 63 |         """Filters the plugins based on the passed arguments.
 64 | 
 65 |         Individual plugins are given more preference than categories. If a plugin
 66 |         was excluded in the category filtering phase, it may be added back if it
 67 |         was passed in the `include_sites` list.
 68 | 
 69 |         Args:
 70 |           include_categories (list): Categories of plugins to search
 71 |           exclude_categories (list): Categories of plugins to not search
 72 |           include_sites (list): Names of services to search
 73 |           exclude_sites (list): Names of services to not search
 74 | 
 75 |         Returns:
 76 |           A list of filtered plugin objects.
 77 | 
 78 |         Raises:
 79 |           NoPluginsError: if there are no usable plugins
 80 | 
 81 |         """
 82 |         if not self.plugins:
 83 |             raise NoPluginsError()
 84 | 
 85 |         filtered_plugins = set(self.plugins.values())
 86 | 
 87 |         # categories
 88 |         if include_categories:
 89 |             filtered_plugins = set()
 90 |             for plugin in self.plugins.values():
 91 |                 cat = plugin.info()["category"]
 92 |                 if cat in include_categories:
 93 |                     filtered_plugins.add(plugin)
 94 | 
 95 |         elif exclude_categories:
 96 |             for plugin in self.plugins.values():
 97 |                 cat = plugin.info()["category"]
 98 |                 if cat in exclude_categories:
 99 |                     filtered_plugins.remove(plugin)
100 | 
101 |         # sites
102 |         if include_sites:
103 |             filtered_plugins = set()
104 |             for site, plugin in self.plugins.items():
105 |                 if site in include_sites:
106 |                     filtered_plugins.add(plugin)
107 | 
108 |         elif exclude_sites:
109 |             for site, plugin in self.plugins.items():
110 |                 if site in exclude_sites and plugin in filtered_plugins:
111 |                     filtered_plugins.remove(plugin)
112 | 
113 |         return list(filtered_plugins)
114 | 


--------------------------------------------------------------------------------
/cleanbay/torrent.py:
--------------------------------------------------------------------------------
 1 | """contains the `Torrent` data class and the `Category` enum"""
 2 | from dataclasses import dataclass
 3 | from enum import Enum
 4 | 
 5 | 
 6 | class Category(Enum):
 7 |     """Represents the category of a plugin.
 8 | 
 9 |     Variants:
10 |       ALL: Everything under the sun
11 |       GENERAL: Plugins that track everything
12 |       CINEMA: Plugins that track movies
13 |       TV: Plugins that track shows on TV, OTT or anything that's not a movie
14 |       SOFTWARE: Plugins that track software excluding games
15 |       BOOKS: Plugins that index books or audiobooks
16 | 
17 |     """
18 | 
19 |     ALL = 0
20 |     GENERAL = 1
21 |     CINEMA = 2
22 |     TV = 3
23 |     SOFTWARE = 4
24 |     BOOKS = 5
25 | 
26 | 
27 | @dataclass
28 | class Torrent:
29 |     """Represents a torrent listing.
30 | 
31 |     Attributes:
32 |       name (str): Name/title of the torrent
33 |       magnet (str): Magnet URL of the torrent
34 |       seeders (int): Number of seeders. -1 if not listed
35 |       leechers (int): Number of leechers. -1 if not listed
36 |       size (str): Size in the format "<size> <unit>"
37 |       uploader (str): Username of the uploader
38 |       uploaded_at (str): Upload date or time since upload
39 | 
40 |     """
41 | 
42 |     name: str
43 |     magnet: str
44 |     seeders: int
45 |     leechers: int
46 |     size: int
47 |     uploader: str
48 |     uploaded_at: str
49 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "cleanbay"
 3 | version = "0.2.0.beta"
 4 | description = "A metasearch engine for torrents"
 5 | authors = ["Gr3atWh173"]
 6 | license = "MIT"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = "^3.11.4"
10 | beautifulsoup4 = "^4.12.3"
11 | lxml = "^5.1.0"
12 | requests = "^2.31.0"
13 | aiohttp = "^3.9.4"
14 | uvicorn = {extras = ["standard"], version = "^0.27.0.post1"}
15 | fastapi = "^0.109.2"
16 | gunicorn = "^23.0.0"
17 | python-dotenv = "^1.0.1"
18 | slowapi = "^0.1.9"
19 | pytest = "^8.0.0"
20 | certifi = "^2024.2.2"
21 | httpx = "^0.26.0"
22 | pydantic-settings = "^2.2.0"
23 | 
24 | [tool.poetry.dev-dependencies]
25 | pylint = "^2.11.1"
26 | 
27 | [tool.poetry.group.dev.dependencies]
28 | pre-commit = "^3.6.2"
29 | 
30 | [build-system]
31 | requires = ["poetry-core>=1.0.0"]
32 | build-backend = "poetry.core.masonry.api"
33 | 


--------------------------------------------------------------------------------
/test_app.py:
--------------------------------------------------------------------------------
  1 | """Integration tests for the app"""
  2 | 
  3 | import re
  4 | from os import getenv
  5 | from time import sleep
  6 | 
  7 | from fastapi.testclient import TestClient
  8 | 
  9 | from dotenv import load_dotenv
 10 | 
 11 | from app.main import app
 12 | 
 13 | 
 14 | load_dotenv()
 15 | cache_timeout = int(getenv("CACHE_TIMEOUT", "300"))
 16 | 
 17 | client = TestClient(app)
 18 | 
 19 | 
 20 | def test_status():
 21 |     response = client.get("/api/v1/status")
 22 |     assert response.status_code == 200
 23 |     assert response.json()["status"] == "ok"
 24 | 
 25 | 
 26 | def test_empty_search():
 27 |     response = client.post(
 28 |         "/api/v1/search",
 29 |         json={
 30 |             "search_term": "",
 31 |         },
 32 |     )
 33 | 
 34 |     assert response.status_code == 422
 35 | 
 36 | 
 37 | def test_simple_search():
 38 |     response = client.post(
 39 |         "/api/v1/search",
 40 |         json={
 41 |             "search_term": "star wars",
 42 |         },
 43 |     )
 44 | 
 45 |     assert response.status_code == 200
 46 |     assert response.json()["length"] > 0
 47 | 
 48 |     for listing in response.json()["data"]:
 49 |         assert listing["magnet"].startswith("magnet:?xt=urn:btih") or is_valid_url(
 50 |             listing["magnet"]
 51 |         )
 52 | 
 53 | 
 54 | def test_include_site():
 55 |     response = client.post(
 56 |         "/api/v1/search",
 57 |         json={
 58 |             "search_term": "kali",
 59 |             "include_categories": ["software"],
 60 |         },
 61 |     )
 62 | 
 63 |     assert response.status_code == 200
 64 |     assert response.json()["length"] > 0
 65 | 
 66 |     for listing in response.json()["data"]:
 67 |         assert listing["magnet"].startswith("magnet:?xt=urn:btih") or is_valid_url(
 68 |             listing["magnet"]
 69 |         )
 70 |         assert listing["uploader"] == "linuxtracker"
 71 | 
 72 | 
 73 | def test_exclude_categories():
 74 |     response = client.post(
 75 |         "/api/v1/search",
 76 |         json={
 77 |             "search_term": "alpine",
 78 |             "exclude_categories": ["software"],
 79 |         },
 80 |     )
 81 | 
 82 |     assert response.status_code == 200
 83 |     assert response.json()["length"] > 0
 84 | 
 85 |     for listing in response.json()["data"]:
 86 |         assert listing["magnet"].startswith("magnet:?xt=urn:btih") or is_valid_url(
 87 |             listing["magnet"]
 88 |         )
 89 |         assert listing["uploader"] != "linuxtracker"
 90 | 
 91 | 
 92 | def test_include_exclude_categories():
 93 |     response = client.post(
 94 |         "/api/v1/search",
 95 |         json={
 96 |             "search_term": "alpine",
 97 |             "include_categories": ["software"],
 98 |             "exclude_categories": ["cinema"],
 99 |         },
100 |     )
101 | 
102 |     assert response.status_code == 422
103 | 
104 | 
105 | def test_include_sites():
106 |     response = client.post(
107 |         "/api/v1/search",
108 |         json={
109 |             "search_term": "kali",
110 |             "include_sites": ["linuxtracker"],
111 |         },
112 |     )
113 | 
114 |     assert response.status_code == 200
115 |     assert response.json()["length"] > 0
116 | 
117 |     for listing in response.json()["data"]:
118 |         assert listing["magnet"].startswith("magnet:?xt=urn:btih") or is_valid_url(
119 |             listing["magnet"]
120 |         )
121 |         assert listing["uploader"] == "linuxtracker"
122 | 
123 | 
124 | def test_exclude_sites():
125 |     response = client.post(
126 |         "/api/v1/search",
127 |         json={
128 |             "search_term": "alpine",
129 |             "exclude_sites": ["linuxtracker"],
130 |         },
131 |     )
132 | 
133 |     assert response.status_code == 200
134 |     assert response.json()["length"] > 0
135 | 
136 |     for listing in response.json()["data"]:
137 |         assert listing["magnet"].startswith("magnet:?xt=urn:btih") or is_valid_url(
138 |             listing["magnet"]
139 |         )
140 |         assert listing["uploader"] != "linuxtracker"
141 | 
142 | 
143 | def test_include_exclude_sites():
144 |     response = client.post(
145 |         "/api/v1/search",
146 |         json={
147 |             "search_term": "kali",
148 |             "include_sites": ["linuxtracker"],
149 |             "exclude_sites": ["yts"],
150 |         },
151 |     )
152 | 
153 |     assert response.status_code == 422
154 | 
155 | 
156 | def test_advanced_search():
157 |     response = client.post(
158 |         "/api/v1/search",
159 |         json={
160 |             "search_term": "alpine",
161 |             "include_categories": ["software"],
162 |             "exclude_sites": ["eztv", "piratebay"],
163 |         },
164 |     )
165 | 
166 |     assert response.status_code == 200
167 |     assert response.json()["length"] > 0
168 | 
169 |     for listing in response.json()["data"]:
170 |         assert listing["uploader"] not in ["eztv", "piratebay"]
171 | 
172 | 
173 | def test_cache():
174 |     response_first = client.post(
175 |         "/api/v1/search",
176 |         json={
177 |             "search_term": "dune",
178 |             "include_sites": ["yts"],
179 |         },
180 |     )
181 | 
182 |     response_second = client.post(
183 |         "/api/v1/search",
184 |         json={
185 |             "search_term": "dune",
186 |             "include_sites": ["yts"],
187 |         },
188 |     )
189 | 
190 |     assert response_first.json()["cache_hit"] is False
191 |     assert response_second.json()["cache_hit"] is True
192 | 
193 | 
194 | def test_cache_timeout():
195 |     response_first = client.post(
196 |         "/api/v1/search",
197 |         json={
198 |             "search_term": "godfather",
199 |             "include_categories": [],
200 |             "exclude_categories": [],
201 |             "include_sites": ["yts"],
202 |             "exclude_sites": [],
203 |         },
204 |     )
205 | 
206 |     response_second = client.post(
207 |         "/api/v1/search",
208 |         json={
209 |             "search_term": "godfather",
210 |             "include_categories": [],
211 |             "exclude_categories": [],
212 |             "include_sites": ["yts"],
213 |             "exclude_sites": [],
214 |         },
215 |     )
216 | 
217 |     assert response_first.json()["cache_hit"] is False
218 |     assert response_second.json()["cache_hit"] is True
219 | 
220 |     sleep(cache_timeout)
221 | 
222 |     response_third = client.post(
223 |         "/api/v1/search",
224 |         json={
225 |             "search_term": "godfather",
226 |             "include_categories": [],
227 |             "exclude_categories": [],
228 |             "include_sites": ["yts"],
229 |             "exclude_sites": [],
230 |         },
231 |     )
232 | 
233 |     response_fourth = client.post(
234 |         "/api/v1/search",
235 |         json={
236 |             "search_term": "godfather",
237 |             "include_categories": [],
238 |             "exclude_categories": [],
239 |             "include_sites": ["yts"],
240 |             "exclude_sites": [],
241 |         },
242 |     )
243 | 
244 |     assert response_third.json()["cache_hit"] is False
245 |     assert response_fourth.json()["cache_hit"] is True
246 | 
247 | 
248 | # ================ utility functions =====================
249 | 
250 | 
251 | def is_valid_url(url: str) -> bool:
252 |     regex = re.compile(
253 |         r"^(?:http|ftp)s?://"  # http:// or https://
254 |         # pylint:disable=line-too-long
255 |         r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|"  # domain...
256 |         r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"  # ...or ip
257 |         r"(?::\d+)?"  # optional port
258 |         r"(?:/?|[/?]\S+)$",
259 |         re.IGNORECASE,
260 |     )
261 |     return re.match(regex, url) is not None
262 | 


--------------------------------------------------------------------------------