├── requirements.txt ├── .github ├── FUNDING.yml ├── PULL_REQUEST_TEMPLATE.md └── ISSUE_TEMPLATE │ ├── 3_question.md │ ├── 2_enhancement_request.md │ └── 1_bug_report.md ├── .gitignore ├── LICENSE ├── README.md └── TidyIt.py /requirements.txt: -------------------------------------------------------------------------------- 1 | pynzbget 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: caronc 2 | custom: ['https://www.paypal.me/lead2gold', ] 3 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Description: 2 | **Related issue (if applicable):** # 3 | 4 | 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/3_question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ❓ Support Question 3 | about: Ask a question about NZBGet-TidyIt 4 | title: '' 5 | labels: 'question' 6 | assignees: '' 7 | 8 | --- 9 | 10 | :question: **Question** 11 | 12 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/2_enhancement_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 💡 Enhancement Request 3 | about: Got a great idea? Let us know! 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | :bulb: **The Idea** 11 | 12 | 13 | :hammer: **Breaking Feature** 14 | 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/1_bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐛 Bug Report 3 | about: Report any errors and problems 4 | title: '' 5 | labels: 'bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | :beetle: **Describe the bug** 11 | 12 | 13 | :bulb: **Screenshots and Logs** 14 | 15 | 16 | 17 | :computer: **Your System Details:** 18 | - OS: [e.g. RedHat v8.0] 19 | - Python Version: [e.g. Python v2.7] 20 | 21 | :crystal_ball: **Additional context** 22 | Add any other context about the problem here. 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | 167 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | __Note:__ This script was intended to be an [NZBGet](http://nzbget.net) and _Scheduling_ 2 | script for _NZBGet_. However, it also works perfectly well as a standalone script for others too! It can be easily adapted to anyone's environment. 3 | See the _Command Line_ section below for details how you can easily use this on it's own (without NZBGet). 4 | 5 | [![Paypal](http://repo.nuxref.com/pub/img/paypaldonate.svg)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=MHANV39UZNQ5E) 6 | 7 | 8 | TidyIt Scheduler Script 9 | ======================== 10 | TidyIt is a script designed to tidy up your video library; house cleaning 11 | one could say. It takes care of directories that once held video content, 12 | but now is just either empty, or contains old meta data and other junk. 13 | 14 | This script is especially useful if you use a third party application such as 15 | Plex or KODI (XBMC) to manage your video library. It also works great for Synology devices too. In fact, most third party applications and/or appliances that allow you to remove a video from your library will _only_ remove the video itself. They will not remove all the other content that surrounds it. 16 | 17 | Since the primary focus of this script is to remove content from your media 18 | library, I will not be held responsible for any irrecoverable data loss you 19 | experience. I can confirm the tool works for me, but that doesn't mean it 20 | will work for you. The good news is that the script is filled with safe guards! 21 | Thus you'd have to stray far from the default settings to damage your library. 22 | 23 | The script intentionally operates in a _log only_ mode by default unless you 24 | explicitly specify it to run differently. I encourage you to run the script 25 | in this _log only_ mode first anyway; get an idea as to what it wants to do 26 | and the files it wants to handle. If you're happy with its decisions, you can 27 | flip a switch (to the _Move_ or _Delete_ mode) and the script will begin tidying up your library as promised to you. 28 | 29 | The script operates in 3 modes: 30 | * __Preview__: This is the default option. It runs the script and just reports to the screen what it would have otherwise done. It doesn't actually do anthing at all to your library though. This might be all you need as it's output can allow you to take your own actions. Alternatively this is a great method to run in until you get the options the way you like them. 31 | * __Delete__: This mode performs the same check the Preview does however anything flagged to be handled is removed. 32 | * __Move__: This mode _moves_ handled content into another directory (that you identify). This allows you to review what is considered junk and decide for yourself if it should be removed. This method also requires you to be responsible for managing the directory you move content to. 33 | 34 | Installation Instructions 35 | ========================= 36 | 1. Ensure you have at least Python v2.7 or higher installed onto your system. 37 | ```bash 38 | # Pull in dependencies: 39 | pip install -r requirements.txt 40 | ``` 41 | 2. Simply place the __TidyIt.py__ into your NZBGet scripts directory. 42 | * __NZBGet users__: you will want to place these inside of your _nzbget/scripts_ directory. Please ensure you are running _(at least)_ NZBGet v11.0 or higher. You can acquire the latest version of of it from [here](http://nzbget.net/download). 43 | 44 | The Non-NZBGet users can also use this script via a cron (or simply call it 45 | from the command line) to automatically tidy their directories too. 46 | See the __Command Line__ section below for more instructions on how to do this. 47 | 48 | Command Line 49 | ============ 50 | TidyIt.py has a built in command line interface that can be easily tied 51 | to a cron entry or can be easilly called from the command line to automate 52 | the cleanup of your media libraries. 53 | 54 | Here are the switches available to you: 55 | ``` 56 | Usage: TidyIt.py [options] [scandir1 [scandir2 [...]]] 57 | 58 | Options: 59 | -h, --help Show this help message and exit. 60 | -n ENCODING, --encoding=ENCODING 61 | The system encoding to use (utf-8, ISO-8859-1, etc). 62 | The default value is 'UTF-8'. 63 | -s ENTRIES, --safe-entries=ENTRIES 64 | If a safe-entry file/dir is located within a path 65 | scanned then the path is ignored. Use safe-entry files 66 | (or dirs) to intentionally ignore directories of your 67 | choice that reside in your video library. You can 68 | specify more then one safe-entry by separating them 69 | with a comma (,). The default value(s) are 70 | '.tidysafe'. 71 | -t ENTRIES, --always-trash=ENTRIES 72 | Identify any file extensions you wish to always trash 73 | if matched. By default this is not set. You can 74 | specify more then one trash entry by separating each 75 | of them with a comma (,). 76 | -M ENTRIES, --meta-content=ENTRIES 77 | Identify any files and/or directories that should be 78 | treated as meta content. Meta content is only handled 79 | if it's the last thing within a media directory. You 80 | can specify more then one meta entry by separating 81 | each of them with a comma (,). By Default the 82 | following are already defined: 'Thumbs.db', '@eaDir', 83 | '.wdtv', '.DS_Store', '.AppleDouble', '__MACOSX'. 84 | -m SIZE_IN_MB, --video-minsize=SIZE_IN_MB 85 | Specify the minimum size a video must be before it's 86 | treated as part of your collection. This value is used 87 | to diffentiate between video file and samples files. 88 | This value is interpreted in MB (Megabytes) and 89 | defaults to 150 MB. 90 | -x ENTRIES, --video-extras=ENTRIES 91 | Identify the extra files you keep around with your 92 | video files as a comma delimited lit. The script will 93 | scan for these files explicitly and remove them if a 94 | video file bearing the same name is not found. For 95 | this reason you do not want to specify video 96 | extensions here. This defaults to '.nfo,.??.srt,.srt,. 97 | sub,.txt,.sub,.idx,.jpg,.tbn,.nzb,.xml,.diz' if 98 | nothing is specified. 99 | -a AGE_IN_SEC, --min-age=AGE_IN_SEC 100 | Specify the minimum age a directory and/or file must 101 | be before considering it for processing. This value is 102 | interpreted in seconds and defaults to 3600 sec(s). 103 | -c, --clean Unless this switch is specified, this script only runs 104 | in a log only mode (a dry-run) allowing you to see the 105 | actions the script would have otherwise performed. 106 | This switch can be combined with the --move-path (-p) 107 | switch to move handled instead. 108 | -p PATH, --move-path=PATH 109 | Identifiy the path to place content into instead of 110 | removing it. By specifying a --move-path, the --clean 111 | (-c) switch is implied however handled content is 112 | moved instead of being removed. 113 | -k, --keep-directories 114 | Do not delete video directories during cleanup. 115 | -L FILE, --logfile=FILE 116 | Send output to the specified logfile instead of 117 | stdout. 118 | -D, --debug Debug Mode 119 | ``` 120 | 121 | Here is a simple example: 122 | ```bash 123 | # Scan your library (print only mode) 124 | python TidyIt.py /usr/share/TVShows 125 | # Happy with the results? Okay then run the script with the --clean (-c) switch: 126 | 127 | python TidyIt.py -c /usr/share/TVShows 128 | ``` 129 | 130 | You can scan multiple directories with the following command: 131 | ```bash 132 | # Scan your libraries (print only mode) 133 | python TidyIt.py /usr/share/TVShows /usr/share/Movies 134 | ``` 135 | 136 | If you don't want your content to be removed; you can just have handled content moved to another directory for your review later on. All directory paths are preserved so it won't take any rocket science to figure out where the removed content came from. It's basically a safer mode then the --clean (-c) switch provides. 137 | ```bash 138 | # Scan your libraries and move any content to be handled to the 139 | # TidyIt.Trash in your home directory (~ is supported) 140 | python TidyIt.py -m ~/TidyIt.Trash /usr/share/TVShows 141 | ``` 142 | 143 | If the script behaves as you expect it should, you can schedule it as a cron 144 | to frequently clean your libraries every day with a command such as: 145 | ```bash 146 | # $> crontab -e 147 | 0 0 * * * /path/to/TidyIt.py -c /usr/share/TVShows /usr/share/Movies 148 | ``` 149 | -------------------------------------------------------------------------------- /TidyIt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # 4 | # TidyIt Scheduler script for NZBGet 5 | # 6 | # Copyright (C) 2015-2020 Chris Caron 7 | # 8 | # This program is free software; you can redistribute it and/or modify it 9 | # under the terms of the GNU Lesser General Public License as published by 10 | # the Free Software Foundation; either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU Lesser General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU Lesser General Public License 19 | # along with subliminal. If not, see . 20 | # 21 | 22 | 23 | ############################################################################## 24 | ### NZBGET SCHEDULER SCRIPT ### 25 | 26 | # The script searches your media libraries and performs house keeping on it by 27 | # eliminating any meta files that have become wasted disk space since they are 28 | # no longer referenced. 29 | # 30 | # It's quite simple really, a lot of people use other tools to work with their 31 | # content such as XBMC, KODI, etc which can remove shows you don't like, etc. 32 | # But what they don't do is clean up other lingering subtitles, nfo's and other 33 | # things that may have also been associated with the deleted file. 34 | # 35 | # Info about this TidyIt NZB Script: 36 | # Author: Chris Caron (lead2gold@gmail.com). 37 | # Date: Mon, Sep 8th, 2020. 38 | # License: GPLv3 (http://www.gnu.org/licenses/gpl.html). 39 | # Script Version: 0.9.0 40 | # 41 | # A script that can tidy up your library by removing stale content 42 | # left over from removing video files from third party applications. 43 | # The script looks after cleaning off any directory containing dangling 44 | # information about a video file, but no video itself. 45 | # 46 | # NOTE: This script requires Python to be installed on your system. 47 | # 48 | # NOTE: I take absolutely no responsibility for any data loss sustained to 49 | # your media library from any mis-configuration you make. The script 50 | # works well as long as you take caution when changing settings from 51 | # their default values. 52 | 53 | ############################################################################## 54 | ### OPTIONS ### 55 | 56 | # TidyIt Mode (Preview, Delete, Move). 57 | # 58 | # Identify the TidyIt Mode you want to run in: 59 | # Preview - Log all planned TidyIt actions, but do not actually perform 60 | # them. This is the absolute safest mode to survey things with 61 | # first. 62 | # Delete - This handles all of the actual tidying of content. Items 63 | # flagged to be handled are removed in this mode. 64 | # Move - This mode is similar to Delete except content is moved to 65 | # the location you specify. This allows you to preview/review 66 | # the content and manually remove it yourself on your own. 67 | # The content moved preserves the directory structure it was 68 | # found as (relative to the search path you specified). 69 | # 70 | # This script's primary function is to handle the content you've identified to 71 | # be tidied in some way or another. Ideally you'll set this script in Preview 72 | # mode and use the logs to determine your level of satisifcation. Once you're 73 | # comfortable with how it's behaving, you can flip the mode to one that will 74 | # actually be more productive (basically any mode but Preview). 75 | # 76 | # I take absolutely no responsibility for any loss of data sustained to your 77 | # system from ill configuration by your part. 78 | # 79 | # NOTE: that you should ALWAYS operate in 'Preview' mode first to confirm 80 | # that the script doesn't cause irreversable damage to your media library. 81 | # 82 | #Mode=Preview 83 | 84 | # Move Path. 85 | # 86 | # This argument is only required if you set your Mode to 'Move'. Identify 87 | # the path you want to move handled content to. If no path is specified then 88 | # the 'Move' mode falls gracefully back into the 'Preview' mode. The Tildy 89 | # (~) can be used to expand the path in efforts to support the home 90 | # directory 91 | # 92 | #MovePath=~/Desktop/TidyIt.Trash 93 | 94 | # Always Trash File Extensions. 95 | # 96 | # Identify the files extensions you want to always throw away if matched. 97 | # This is intentionally left blank because the script works fine without it. 98 | # But if your video library gets populated with content you'd prefer 99 | # not have around, you can identify it here. 100 | # 101 | # Use a 'question mark' (?) to identify single placeholders for 102 | # printable characters (this does not include white space). This list is not 103 | # case-sensitive. You can also use a 'asterix' (*) to identify the regular 104 | # expression (.*) or zero or many. Keep in mind that *.nfo is the same as 105 | # writing .nfo below. the * is automatically implied at the front. Use a 106 | # comma (,) and/or space to separate more then one entry. 107 | # Example=.zip,.r??,.z7,.0??,.1??,.2??,.3?? 108 | # 109 | #AlwaysTrash= 110 | 111 | # Meta Content. 112 | # 113 | # Identify metafile content you wish to handle when they're the only things 114 | # left in the video directory. For example... Microsoft likes putting 115 | # a Thumbs.db file which contains a thumbnail cache of any images and videos 116 | # found. These are not nessisarily useless to us while our media directory 117 | # is populated with content, but is utterly useless to us if it isn't. 118 | # 119 | # We want to identfy meta content here. There is some common meta information 120 | # automatically defined (and do not have to be specified): 121 | # - Thumbs.db: Microsoft Thumbnail Database. 122 | # - .DS_Store: OSX meta directory. 123 | # - .AppleDouble: OSX meta directory. 124 | # - __MACOSX: OSX meta directory. 125 | # - @eaDir: Synology meta directory. 126 | # 127 | # But you can additionally identify more entries here. Use a comma and/or 128 | # space to delimit multiple entries. 129 | # 130 | #MetaContent= 131 | 132 | # Video Libraries to Scan. 133 | # 134 | # Specify any number of directories this script can (recursively) check 135 | # delimited by a comma and or space. ie: /home/nuxref/mystuff, /path/no3, etc 136 | # Video Libraries to Scan. 137 | # 138 | # Specify any number of directories this script can (recursively) check 139 | # delimited by a comma and or space. ie: /home/nuxref/mystuff, /path/no3, etc 140 | # For windows users, you can specify: C:\My Downloads, \\My\Network\Path, etc 141 | # 142 | #VideoPaths= 143 | 144 | # Video Extra File Extensions. 145 | # 146 | # Identify the extra files you keep around with your video files. The script 147 | # will scan for these files explicitly and remove them if a video file 148 | # bearing the same name is not found. For this reason you do not want to 149 | # specify video extensions here. 150 | # 151 | # You can use a 'question mark' (?) to identify single placeholders for 152 | # printable characters (this does not include white space). This list is not 153 | # case-sensitive. You can also use a 'asterix' (*) to identify the regular 154 | # expression (.*) or zero or many. Keep in mind that *.nfo is the same as 155 | # writing .nfo below. the * is automatically implied at the front. Use a 156 | # comma (,) and/or space to separate more then one entry. 157 | # The following are already implied and do not need to be specified: 158 | # .nfo,.??.srt,.srt,.txt,.nfo,.srr,.sfv,.sub,.idx,.jpg,.tbn,.xml,.jpg,.jpeg 159 | # Example=.???.forced.srt,.nzb,.xml 160 | # 161 | #VideoExtras=.???.forced.srt 162 | 163 | # Minimum Video Size. 164 | # 165 | # Identify the minimum size (in MB) a video file must be before it's 166 | # treated as part of your collection (and not a sample or extra). 167 | # If you want every video file to be considered equal; simply set 168 | # this value to zero (0). 169 | # 170 | #VideoMinSize=150 171 | 172 | # Minimum Processing Age. 173 | # 174 | # Identify the minimum age (in seconds) a file and/or directory can be 175 | # before it's processed. This prevents this script from invading a directory 176 | # that may be being concurrently managed by another application around the 177 | # same time. Anything older then 10 Min (600) is relatively safe. 178 | # The default is 3600 (1 hour) for an extra precautionary sake. 179 | # Set this value to zero (0) if you don't wan't to factor in 180 | # the age of the files and directories scanned. 181 | # 182 | #ProcessMinAge=3600 183 | 184 | # File extensions for video files. 185 | # 186 | # Only files with these extensions are considered a video. Extensions must 187 | # be separated with commas. 188 | # Example=.mkv,.avi,.divx,.xvid,.mov,.wmv,.mp4,.mpg,.mpeg,.vob,.iso 189 | # 190 | #VideoExtensions=.mkv,.avi,.divx,.xvid,.mov,.wmv,.mp4,.mpg,.mpeg,.vob,.iso 191 | 192 | # Tidy Safe Entries. 193 | # 194 | # No doubt there will be a situation where you will point the tidy script 195 | # to a series of directories to which you want to intentionally avoid 196 | # removing. Use a comma to identify any content that immediately makes 197 | # a directory safe from removal if found. You can specify directory or 198 | # filenames. Simply use a comma and/or space to delimite multiple safe 199 | # entries. 200 | # 201 | #SafeEntries=.notidy 202 | 203 | # My Systems File Encoding (UTF-8, UTF-16, ISO-8859-1, ISO-8859-2). 204 | # 205 | # All systems have their own encoding; here is a loose guide you can use 206 | # to determine what encoding you are (if you're not sure): 207 | # - UTF-8: This is the encoding used by most Linux/Unix filesystems. Just 208 | # check the global variable $LANG to see if that's what you are. 209 | # - UTF-16: This is the encoding usually used by OS/X systems and NTFS. 210 | # - ISO-8859-1: Also referred to as Latin-1; Microsoft Windows used this 211 | # encoding for years (in the past), and still do in some 212 | # cases. It supports the English, Spanish, and French language 213 | # character sets. 214 | # - ISO-8859-2: Also referred to as Latin-2; It supports Czech, German, 215 | # Hungarian, Polish, Romanian, Croatian, Slovak, and 216 | # Slovene character sets. 217 | # 218 | # If you wish to add another encoding; just email me and i'll add it. 219 | # All files that are downloaded will be written to your filesystem using 220 | # the same encoding your operating system uses. Since there is no way 221 | # to detect this (yet), by specifying it here, you can make it possible 222 | # to handle files with the extended character sets. 223 | # 224 | #SystemEncoding=UTF-8 225 | 226 | # Keep Directories. 227 | # 228 | # Set this to Yes if you want this script to avoid cleaning up directories 229 | # during it's tidying phases. This option will not applie to meta 230 | # directories though. 231 | # 232 | #KeepDirectories=No 233 | 234 | # Enable debug logging (yes, no). 235 | # 236 | # If you experience a problem, you can bet I'll have a much easier time solving 237 | # it for you if your logs include debugging. This can be made possible by 238 | # flipping this flag here. 239 | # 240 | #Debug=no 241 | 242 | # Issue a tidy of any directories you defined above here: 243 | #DoTidy@Tidy Defined Media Paths 244 | 245 | ### NZBGET SCHEDULER SCRIPT ### 246 | ############################################################################## 247 | 248 | import re 249 | from os.path import join 250 | from os import listdir 251 | from os.path import basename 252 | from os.path import abspath 253 | from os.path import dirname 254 | from os.path import isdir 255 | from os.path import isfile 256 | from os.path import exists 257 | from os import sep as os_separator 258 | 259 | from os import unlink 260 | from os import makedirs 261 | from shutil import rmtree 262 | from shutil import move 263 | 264 | from stat import ST_MTIME 265 | from stat import ST_SIZE 266 | 267 | # This is required if the below environment variables 268 | # are not included in your environment already 269 | import sys 270 | 271 | # stat is used to test if the .srt file was fetched okay or not 272 | from os import stat 273 | 274 | # Script dependencies identified below 275 | from datetime import timedelta 276 | from datetime import datetime 277 | 278 | # pynzbget Script Wrappers 279 | from nzbget import SKIP_DIRECTORIES 280 | from nzbget import SchedulerScript 281 | from nzbget import EXIT_CODE 282 | from nzbget import SCRIPT_MODE 283 | from nzbget.Utils import tidy_path 284 | from nzbget.Utils import os_path_split 285 | 286 | # Meta Information 287 | MEDIAMETA_FILES_RE = ( 288 | re.compile('^(backdrop|banner|fanart|folder|poster|season-specials-poster)\.jpe?g', re.IGNORECASE), 289 | re.compile('^season([0-9]+)?(-(specials|banner|poster))?(-poster)?\.(tbn|jpe?g)', re.IGNORECASE), 290 | re.compile('^tvshow.nfo', re.IGNORECASE), 291 | re.compile('^series.xml', re.IGNORECASE) 292 | ) 293 | 294 | # Alike files are files that exist either next to the video file in question 295 | # In cases where the video file is missing, but it's alike file is found, 296 | # we can go ahead and clean up 297 | VIDEO_ALIKE_FILES_RE = ( 298 | # Subtitles 299 | re.compile('^(?P.*)\.[A-Za-z]{2}\.srt$', re.IGNORECASE), 300 | # Thumbnails 301 | re.compile('^(?P.*)-thumb\.(jpe?g)$', re.IGNORECASE), 302 | # Meta 303 | re.compile('^(?P.*)\.(sfv|txt|nfo|tbn|jpe?g|srt|srr|sub|idx)$', re.IGNORECASE), 304 | ) 305 | 306 | # Meta Directory(ies) 307 | # Sometimes the matching meta information is actually within a subdirectory 308 | # If mismatched data is found in directories identified here, they're match 309 | # is searched against content in the parent directory too just to be safe 310 | # before assuming the content should be removed 311 | METADIRS = ( 'metadata', ) 312 | 313 | # A Tuple that contains all of the directories and/files that are always 314 | # ignored reguardless and can be safely be removed if found within a directory 315 | OS_METADATA_ENTRIES = ( 316 | # Microsoft Thumbnails 317 | 'Thumbs.db', 318 | # Synology Device Meta Directory 319 | '@eaDir', 320 | # Western Digital Meta Directory 321 | '.wdtv', 322 | ) + SKIP_DIRECTORIES 323 | 324 | # A list of compiled regular expressions identifying files to not parse if 325 | # found in a directory. This does not make them safe from removal though! 326 | # 327 | # In the event that only ignored content remains in the directory, it will 328 | # be automatically removed. This list was really only created to provide 329 | # an 'exception' to what is considered valid. Hence, it's easier to 330 | # identify *.mkv and then 'exclude' the list below as valid. 331 | # 332 | # This list is also backwards checked against ALIKE matches to make sure 333 | # we don't pick from something on this list. 334 | IGNORE_FILELIST_RE = ( 335 | # Sample Videos should not be included in our main listing 336 | re.compile('^(?P.*)[.-]sample\.[^.]+$', re.IGNORECASE), 337 | re.compile('^sample[.-](?P.*)\.[^.]+$', re.IGNORECASE), 338 | ) 339 | 340 | class TIDYIT_MODE(object): 341 | # Delete content set to by Tidied 342 | DELETE = "Delete" 343 | # Move content to the path specified instead of deleting it 344 | MOVE = "Move" 345 | # Do nothing; just preview what was intended to be tidied 346 | PREVIEW = "Preview" 347 | 348 | # TidyIt Modes 349 | TIDYIT_MODES = ( 350 | TIDYIT_MODE.DELETE, 351 | TIDYIT_MODE.PREVIEW, 352 | TIDYIT_MODE.MOVE, 353 | ) 354 | # Default in a Read-Only Mode; It's the safest way! 355 | TIDYIT_MODE_DEFAULT = TIDYIT_MODE.PREVIEW 356 | 357 | DEFAULT_VIDEO_EXTENSIONS = \ 358 | '.mkv,.avi,.divx,.xvid,.mov,.wmv,.mp4,.mpg,.mpeg,.vob,.iso' 359 | 360 | DEFAULT_TIDYSAFE_ENTRIES = \ 361 | '.tidysafe' 362 | 363 | # Always default to nothing (forcing it back to a preview mode) 364 | DEFAULT_MOVE_PATH = '' 365 | 366 | class TidyCode(object): 367 | """ Codes returned by tidy_library() function that provide instructions 368 | as to what to do next 369 | """ 370 | REMOVE = 0 371 | IGNORE = -1 372 | 373 | # A collection of all the tidy_library() return codes 374 | TIDY_CODES = (TidyCode.REMOVE, TidyCode.IGNORE) 375 | 376 | # The number of seconds a matched directory/file has to have aged before it 377 | # is processed further. This prevents the script from removing content 378 | # that may being processed 'now'. All content must be older than this 379 | # to be considered. This value is represented in seconds. 380 | DEFAULT_MATCH_MINAGE = 3600 381 | 382 | # The number of MegaBytes the detected video must be (with respect 383 | # to it's filesize). If it is less than this value, then it is 384 | # presumed to be a sample. 385 | DEFAULT_VIDEO_MIN_SIZE_MB = 150 386 | 387 | # System Encodings 388 | DEFAULT_ENCODINGS = ( 389 | # Most Linux Systems 390 | 'UTF-8', 391 | # NTFS/OS-X 392 | 'UTF-16', 393 | # Most French/English/Spanish Windows Systems 394 | 'ISO-8859-1', 395 | # Czech, German, Hungarian, Polish, Romanian, 396 | # Croatian, Slovak, Slovene. 397 | 'ISO-8859-2', 398 | ) 399 | 400 | # Filesystem Encoding 401 | DEFAULT_SYSTEM_ENCODING = 'UTF-8' 402 | 403 | # Default Keep Directory Switch 404 | DEFAULT_KEEP_DIRECTORY_SWITCH = 'No' 405 | 406 | 407 | class TidyItScript(SchedulerScript): 408 | """A Media Library Tidying tool written for NZBGet 409 | """ 410 | 411 | def _re_str(self, re_str): 412 | """ 413 | Support custom RE provided by this script where * becomes .* 414 | and ? becomes . 415 | """ 416 | return re.sub( 417 | '\*', '.*', re.sub( 418 | '\?', '.', re.sub( 419 | '\)', '\\)', re.sub( 420 | '\^', '\\^', re.sub( 421 | '\.', '\\.', re_str, 422 | ))))) 423 | 424 | def _handle(self, path, depth): 425 | """ 426 | A Simple wrapper to handle content in addition to logging it. 427 | """ 428 | 429 | if not isdir(path): 430 | # File Removal 431 | if self.mode == TIDYIT_MODE.DELETE: 432 | try: 433 | unlink(path) 434 | self.logger.info('Removed FILE: %s' % path) 435 | except: 436 | self.logger.error('Could not removed FILE: %s' % path) 437 | return False 438 | 439 | elif self.mode == TIDYIT_MODE.MOVE: 440 | # Using the depth, we need to determine the path we're 441 | # generating for the new file 442 | tmp_fullpath = join( 443 | self.move_path, 444 | os_separator.join(os_path_split(path)[-depth:]), 445 | ) 446 | 447 | # Handle duplicate files: 448 | if exists(tmp_fullpath): 449 | index = 1 450 | _new_path = '%s.%.5d' % (tmp_fullpath, index) 451 | while exists(_new_path): 452 | index +=1 453 | _new_path = '%s.%.5d' % (tmp_fullpath, index) 454 | # Store our new path 455 | tmp_fullpath = _new_path 456 | 457 | tmp_dirname = dirname(tmp_fullpath) 458 | if not isdir(tmp_dirname): 459 | try: 460 | makedirs(tmp_dirname) 461 | except Exception as e: 462 | self.logger.error( 463 | 'Could not create move path: %s' % tmp_dirname, 464 | ) 465 | self.logger.debug('makedirs() Exception %s' % str(e)) 466 | 467 | # Now create our directory path if it doesn't exist 468 | try: 469 | move(path, tmp_fullpath) 470 | self.logger.info('Moved FILE: %s' % path) 471 | except Exception as e: 472 | self.logger.error('Could not move FILE: %s' % path) 473 | self.logger.debug('Move Exception %s' % str(e)) 474 | return False 475 | 476 | else: 477 | self.logger.info('PREVIEW ONLY: Handle FILE: %s' % path) 478 | else: 479 | # Directory Removal 480 | if self.mode == TIDYIT_MODE.DELETE: 481 | try: 482 | rmtree(path) 483 | self.logger.info('Removed DIRECTORY: %s' % path) 484 | except: 485 | self.logger.error('Could not remove DIRECTORY: %s' % path) 486 | return False 487 | 488 | elif self.mode == TIDYIT_MODE.MOVE: 489 | # Using the depth, we need to determine the path we're 490 | # generating for the new file 491 | tmp_fullpath = join( 492 | self.move_path, 493 | os_separator.join(os_path_split(path)[-depth:]), 494 | ) 495 | 496 | if isfile(tmp_fullpath): 497 | # Directories are usually already handled because 498 | # they contain files and have already been created and 499 | # set up... but just to be bulletproof; this will handle 500 | # situations where a file exists where a directory should 501 | # be. 502 | index = 1 503 | _new_path = '%s.%.5d' % (tmp_fullpath, index) 504 | while isfile(_new_path): 505 | index +=1 506 | _new_path = '%s.%.5d' % (tmp_fullpath, index) 507 | # Store our new path 508 | tmp_fullpath = _new_path 509 | 510 | tmp_dirname = dirname(tmp_fullpath) 511 | if not isdir(tmp_dirname): 512 | try: 513 | makedirs(tmp_dirname) 514 | except Exception as e: 515 | self.logger.error( 516 | 'Could not create move path: %s' % tmp_dirname, 517 | ) 518 | self.logger.debug('makedirs() Exception %s' % str(e)) 519 | 520 | if not isdir(tmp_fullpath): 521 | # Now create our directory path if it doesn't exist 522 | try: 523 | move(path, tmp_fullpath) 524 | self.logger.info('Moved DIRECTORY: %s' % path) 525 | except Exception as e: 526 | self.logger.error('Could not move DIRECTORY: %s' % path) 527 | self.logger.debug('Move Exception %s' % str(e)) 528 | return False 529 | 530 | # Now that content has been backed up properly, we can 531 | # safely remove the source directory 532 | if isdir(path): 533 | try: 534 | rmtree(path) 535 | self.logger.info( 536 | 'Removed (already backed up) ' + \ 537 | 'DIRECTORY: %s' % path, 538 | ) 539 | except: 540 | self.logger.error( 541 | 'Could not remove (already backed up) ' + \ 542 | 'DIRECTORY: %s' % path, 543 | ) 544 | return False 545 | else: 546 | self.logger.info('PREVIEW ONLY: Handle DIRECTORY: %s' % path) 547 | 548 | return True 549 | 550 | def tidy_library(self, path, extensions, extras, minsize, minage, keep_dirs, *args, **kwargs): 551 | """ 552 | - Recursively scan a library path and returns the number of files 553 | found in a directory. 554 | 555 | But the path library will be skewed if changes are determiend to happen. 556 | - If a directory contains another directory within it; it will never 557 | be removed. 558 | - If a meta directory exists, it will not be considered as part 559 | of this rule. 560 | The directory passed into the function (for the first time 561 | will 'never' be removed reguardless of scanned outcome 562 | 563 | - if keep_dirs is set to True, then directoreies are NOT removed. 564 | """ 565 | if not isdir(path): 566 | # Not a directory? then return a value that will prevent 567 | # the file/block from being removed (non-zero) 568 | return TidyCode.IGNORE 569 | 570 | # Internal Tracking of Directory Depth 571 | # A depth of 0 is a 'safe' directory that will 572 | # never be removed 573 | current_depth = kwargs.get('__current_depth', 1) 574 | 575 | if current_depth == 1: 576 | self.logger.info('Scanning %s' % path) 577 | 578 | # Change to absolute path 579 | path = abspath(path) 580 | ref_time = datetime.now() - timedelta(seconds=minage) 581 | # Check absolute path date (because we don't want to 582 | # process anything in it if it was touched recently) 583 | try: 584 | stat_obj = stat(path) 585 | mtime = datetime.fromtimestamp(stat_obj[ST_MTIME]) 586 | if current_depth > 1 and mtime >= ref_time: 587 | # We're done; directory is to new 588 | self.logger.debug('Skipping %s; modified less than %ds ago.' % ( 589 | path, 590 | minage, 591 | )) 592 | return TidyCode.IGNORE 593 | 594 | except OSError: 595 | # The directory suddently became inaccessible 596 | self.logger.warning( 597 | 'Path %s is inaccessible.' % path, 598 | ) 599 | # Since the directory is missing (or inaccessible 600 | # due to permissions) return an IGNORE on it 601 | return TidyCode.IGNORE 602 | 603 | except ValueError: 604 | # datetime could not parse the time correctly 605 | # Newly created directories won't have this problem 606 | # we can move along 607 | pass 608 | 609 | # Store Filesize 610 | size = stat_obj[ST_SIZE] 611 | 612 | # First check for the goods; we may not have to do 613 | # further processing otherwise 614 | _valid_paths = self.get_files( 615 | path, 616 | regex_filter=extensions, 617 | min_depth=1, max_depth=1, 618 | fullstats=True, 619 | skip_directories=False, 620 | ) 621 | 622 | # Apply Filters To Detect actual valid video files 623 | _valid_paths = [ k for (k, v) in _valid_paths.items() if \ 624 | v['filesize'] >= minsize] 625 | 626 | valid_paths = [] 627 | while(len(_valid_paths)): 628 | _path = _valid_paths.pop() 629 | if isfile(_path) and \ 630 | True in [ v.search(basename(_path)) is not None \ 631 | for v in IGNORE_FILELIST_RE ]: 632 | self.logger.debug( 633 | 'Skipping - Ignored file: %s' % basename(_path)) 634 | continue 635 | valid_paths.append(_path) 636 | del _valid_paths 637 | 638 | if len(valid_paths): 639 | # at least one valid file was found in this directory 640 | # but it doesn't rule out the fact the possibility of 641 | # movie files existing in further sub directories. 642 | # so we flag this as a safe dir and keep processing 643 | 644 | # The easiest way to mark a directory safe is to just 645 | # toggle the current_depth to one (1). 646 | current_depth = 1 647 | 648 | # Get All Entries 649 | dirents = [ d for d in listdir(path) \ 650 | if d not in ('..', '.') ] 651 | 652 | tidylist = [] 653 | remove_if_empty = [] 654 | while len(dirents): 655 | 656 | # Pop directory entry 657 | dirent = dirents.pop() 658 | 659 | # Build absolute path with it 660 | fullpath = join(path, dirent) 661 | 662 | # Check for TidySafe Content 663 | if dirent in self.tidysafe_entries: 664 | # it was found; change the current depth 665 | # we do not proceed any further 666 | self.logger.debug('Safe entry %s found in %s' % (dirent, path)) 667 | return TidyCode.IGNORE 668 | 669 | if dirent in self.meta_entries: 670 | # METADATA is only cannon-fodder if it's determined 671 | # the directory should be removed 672 | self.logger.debug('Potential handling (os meta data): %s' % fullpath) 673 | remove_if_empty.append(fullpath) 674 | continue 675 | 676 | try: 677 | stat_obj = stat(fullpath) 678 | mtime = datetime.fromtimestamp(stat_obj[ST_MTIME]) 679 | if mtime >= ref_time: 680 | # We're done; directory is to new 681 | self.logger.debug('Skipping %s; %s was modified less than %ds ago.' % ( 682 | path, 683 | dirent, 684 | minage, 685 | )) 686 | return TidyCode.IGNORE 687 | 688 | except OSError: 689 | # The directory suddently became inaccessible 690 | self.logger.warning( 691 | 'Path %s became inaccessible.' % fullpath, 692 | ) 693 | # Since the directory is missing, return 0 letting 694 | # recursively called situations go ahead and handle 695 | # this directory 696 | continue 697 | 698 | except ValueError: 699 | # datetime could not parse the time correctly 700 | # Newly created directories won't have this problem 701 | # we can move along 702 | pass 703 | 704 | # Store Filesize 705 | size = stat_obj[ST_SIZE] 706 | 707 | if isdir(fullpath): 708 | if dirent in METADIRS: 709 | if len(valid_paths) == 0: 710 | # Meta content is useless to us if the directory 711 | # is empty 712 | tidylist.append(fullpath) 713 | self.logger.debug('Planned handling (metadata): %s' % fullpath) 714 | else: 715 | # Meta data exists, the best way to tackle this is 716 | # to append it to the current dirent list to be 717 | # processed 718 | dirents.extend([ join(dirent, d) for d in listdir(fullpath) \ 719 | if d not in ('..', '.') ]) 720 | 721 | # Next File 722 | continue 723 | 724 | # Recursively continue scanning 725 | code = self.tidy_library( 726 | path=fullpath, 727 | extensions=extensions, 728 | extras=extras, 729 | minsize=minsize, 730 | minage=minage, 731 | keep_dirs=keep_dirs, 732 | # Internal 733 | __current_depth=current_depth+1, 734 | ) 735 | 736 | if code == TidyCode.IGNORE: 737 | # Add this directory back to the 738 | # valid paths to prevent it from being 739 | # removed later 740 | valid_paths.append(fullpath) 741 | 742 | elif code == TidyCode.REMOVE: 743 | # We got instructions to remove 744 | # the directory 745 | if not keep_dirs: 746 | tidylist.append(fullpath) 747 | self.logger.debug('Planned handling (dir): %s' % fullpath) 748 | 749 | # Next File 750 | continue 751 | 752 | if isfile(fullpath): 753 | # Match against extras as a way of safeguarding 754 | if extensions.search(fullpath): 755 | if len(valid_paths) == 0: 756 | # We found a video file in a situation where 757 | # there were no 'valid' ones; This is caused by the 758 | # filesize not meeting the defined requirements or it 759 | # was defined in the IGNORE_FILELIST and it's the last 760 | # remaining content found in the directory 761 | tidylist.append(fullpath) 762 | self.logger.debug('Planned handling (invalid video): %s' % fullpath) 763 | # Next File 764 | continue 765 | 766 | # Meta Information 767 | found = False 768 | for regex in MEDIAMETA_FILES_RE: 769 | if regex.search(basename(dirent)): 770 | found = True 771 | # Break for() loop 772 | break 773 | if found: 774 | # Add file to tidy if empty queue 775 | self.logger.debug('Potential handling (meta data): %s' % fullpath) 776 | remove_if_empty.append(fullpath) 777 | # Next File 778 | continue 779 | 780 | # Filesize 781 | if size == 0: 782 | # Zero byte files are never good 783 | tidylist.append(fullpath) 784 | self.logger.debug('Planned handling (zero byte file): %s' % fullpath) 785 | continue 786 | 787 | # Match against always trash items (if configured to do so) 788 | elif self.always_trash is not None and self.always_trash.search(fullpath): 789 | # we found a file we flagged to always be trashed when 790 | # matched 791 | tidylist.append(fullpath) 792 | self.logger.debug('Planned handling (marked for trash): %s' % fullpath) 793 | # Next File 794 | continue 795 | 796 | if len(valid_paths) > 0: 797 | # Handle alike files 798 | found = False 799 | for afile in extras: 800 | match = afile.match(fullpath) 801 | if match: 802 | # We found a file to match for file in question 803 | # in order to decide it's fate 804 | for entry in valid_paths: 805 | # Match against a valid_path 806 | if basename(entry).\ 807 | startswith( 808 | basename(match.group('filename'))) and \ 809 | True not in [ ignore.search(basename(entry)) is not None \ 810 | for ignore in IGNORE_FILELIST_RE ]: 811 | # We have a good match! 812 | self.logger.debug('%s belongs to %s' % ( 813 | dirent, 814 | basename(entry), 815 | )) 816 | found = True 817 | break 818 | 819 | # No match if we get here, check the current 820 | # directory name against the file: 821 | if basename(dirname(entry)).\ 822 | startswith( 823 | basename(match.group('filename'))): 824 | # We have a good match! 825 | self.logger.debug('%s belongs to (dir) %s' % ( 826 | dirent, 827 | basename(dirname(entry)), 828 | )) 829 | found = True 830 | break 831 | 832 | if not found: 833 | # We didn't find anything on an 834 | # Alike match 835 | tidylist.append(fullpath) 836 | self.logger.debug('Planned handling (no alike match): %s' % fullpath) 837 | # Trip Found flag since we've 838 | # aready handled this file 839 | found = True 840 | break 841 | 842 | if found: 843 | # Next File since we handled or at 844 | # least matched an Alike file 845 | continue 846 | 847 | # If we make it to the end, we scanned a file 848 | # that does not meet filtering criterias 849 | 850 | # This is like a safe file. We don't know what it is; so we don't 851 | # want to avoid destroying something we shouldn't 852 | self.logger.debug('Unhandled entry found: %s (safe-guarded)' % fullpath) 853 | return TidyCode.IGNORE 854 | 855 | if len(dirents) == 0 and len(valid_paths) == 0: 856 | # we successfully handled every file/dir 857 | # in the current directory and there were no 858 | # valid files found in the list 859 | 860 | # Append the remove_if_empty list in this 861 | # scenario 862 | tidylist.extend(remove_if_empty) 863 | 864 | # We only tidy the parent if all of it's children 865 | # are gone 866 | for entry in tidylist: 867 | self._handle(entry, current_depth) 868 | 869 | if len(dirents) + len(valid_paths): 870 | # We have a media directory worth keeping 871 | return TidyCode.IGNORE 872 | 873 | if current_depth > 1: 874 | return TidyCode.REMOVE 875 | 876 | return TidyCode.IGNORE 877 | 878 | 879 | def tidy(self): 880 | """All of the core cleanup magic happens here. 881 | """ 882 | 883 | if not self.validate(keys=( 884 | 'Mode', 885 | 'MovePath', 886 | 'VideoPaths', 887 | 'AlwaysTrash', 888 | 'MetaContent', 889 | 'VideoMinSize', 890 | 'ProcessMinAge', 891 | 'VideoExtensions', 892 | 'SafeEntries', 893 | 'VideoExtras', 894 | 'SystemEncoding')): 895 | 896 | return False 897 | 898 | 899 | # Fix mode to object (self.*) 900 | self.mode = self.get('Mode', TIDYIT_MODE_DEFAULT) 901 | if self.mode not in TIDYIT_MODES: 902 | self.logger.error('The specified mode "%s" is not supported.' % self.mode) 903 | return False 904 | 905 | # Fix tidy-safe entries to object (self.*) 906 | self.tidysafe_entries = self.parse_list(self.get('SafeEntries', DEFAULT_TIDYSAFE_ENTRIES)) 907 | 908 | # Store Move Path 909 | self.move_path = tidy_path(self.get('MovePath', DEFAULT_MOVE_PATH)) 910 | if self.move_path: 911 | # Convert to absolute path if possible 912 | self.move_path = abspath(self.move_path) 913 | 914 | if self.mode == TIDYIT_MODE.MOVE and not self.move_path: 915 | # Fall back to preview mode if no move path is specified 916 | self.mode = TIDYIT_MODE.PREVIEW 917 | self.logger.warning('MovePath not specified; falling back to Preview Mode.') 918 | 919 | # Remaining Environment Variables 920 | video_extensions = self.parse_list(self.get('VideoExtensions', DEFAULT_VIDEO_EXTENSIONS)) 921 | video_minsize = int(self.get('VideoMinSize', DEFAULT_VIDEO_MIN_SIZE_MB)) * 1048576 922 | minage = int(self.get('ProcessMinAge', DEFAULT_MATCH_MINAGE)) 923 | encoding = self.get('SystemEncoding', DEFAULT_SYSTEM_ENCODING) 924 | paths = self.parse_path_list(self.get('VideoPaths')) 925 | self.meta_entries = self.parse_list(self.get('MetaContent', OS_METADATA_ENTRIES)) 926 | 927 | # Directory Handling 928 | keep_dirs = self.parse_bool( 929 | self.get('KeepDirectories', DEFAULT_KEEP_DIRECTORY_SWITCH)) 930 | 931 | # Create Unique List of Meta Entries 932 | self.meta_entries = set(list(self.meta_entries) + list(OS_METADATA_ENTRIES)) 933 | self.logger.debug('Meta Entries set to: "%s"' % '", "'.join(self.meta_entries)) 934 | 935 | # Extra Managing - build regular expressions based on input 936 | video_extras = self.parse_list(self.get('VideoExtras', '')) 937 | 938 | # Start ourselves off with an extras list 939 | extras = [ x for x in VIDEO_ALIKE_FILES_RE ] 940 | 941 | if len(video_extras): 942 | _extras = '%s' % '|'.join(video_extras) 943 | _extras = '^(?P.*)(' + self._re_str(_extras) + ')$' 944 | try: 945 | __extras = re.compile('(%s)$' % _extras, re.IGNORECASE) 946 | self.logger.debug('Compiled regex "(%s)"' % _extras) 947 | # insert our extras to the head of our list 948 | extras.insert(0, __extras) 949 | except: 950 | self.logger.warning( 951 | 'Invalid regular expression: "(%s)$"' % _extras, 952 | ) 953 | 954 | if not len(video_extensions): 955 | self.logger.error('No video extensions were specified.') 956 | return False 957 | 958 | _extensions = r'%s' % '|'.join(video_extensions) 959 | _extensions = self._re_str(_extensions) 960 | try: 961 | extensions = re.compile(r'(%s)$' % _extensions, re.IGNORECASE) 962 | self.logger.debug('Compiled regex "(%s)$"' % _extensions) 963 | except: 964 | self.logger.warning( 965 | 'Invalid regular expression: "(%s)$"' % _extensions, 966 | ) 967 | 968 | # Trash Managing - build regular expressions based on input 969 | always_trash_entries = self.parse_list(self.get('AlwaysTrash', [])) 970 | 971 | # By Default; Always Trash is Disabled (Safer this way) 972 | self.always_trash = None 973 | if len(always_trash_entries): 974 | _always_trash = '%s' % '|'.join(always_trash_entries) 975 | _always_trash = self._re_str(_always_trash) 976 | try: 977 | self.always_trash = re.compile('(%s)$' % _always_trash, re.IGNORECASE) 978 | self.logger.debug('Compiled "Always Trash" regex "(%s)$"' % _always_trash) 979 | except: 980 | # Disable option (Safer this way) 981 | self.logger.warning( 982 | 'Invalid "Always Trash" regular expression: "(%s)$"' % _always_trash, 983 | ) 984 | 985 | for path in paths: 986 | self.tidy_library( 987 | path, 988 | extensions=extensions, 989 | extras=extras, 990 | minsize=video_minsize, 991 | minage=minage, 992 | keep_dirs=keep_dirs, 993 | ) 994 | 995 | # Nothing fetched, nothing gained or lost 996 | return None 997 | 998 | def scheduler_main(self, *args, **kwargs): 999 | """Scheduler 1000 | """ 1001 | 1002 | return self.tidy() 1003 | 1004 | def action_dotidy(self, *args, **kwargs): 1005 | """ 1006 | Execute the DoTidy Test Action 1007 | """ 1008 | # run a single instance 1009 | self.tidy() 1010 | return True 1011 | 1012 | def main(self, *args, **kwargs): 1013 | """CLI 1014 | """ 1015 | return self.tidy() 1016 | 1017 | 1018 | # Call your script as follows: 1019 | if __name__ == "__main__": 1020 | from sys import exit 1021 | from optparse import OptionParser 1022 | 1023 | # Support running from the command line 1024 | usage = "Usage: %prog [options] [scandir1 [scandir2 [...]]]" 1025 | parser = OptionParser(usage=usage) 1026 | parser.add_option( 1027 | "-n", 1028 | "--encoding", 1029 | dest="encoding", 1030 | help="The system encoding to use (utf-8, ISO-8859-1, etc)." + \ 1031 | " The default value is '%s'" % DEFAULT_SYSTEM_ENCODING + ".", 1032 | metavar="ENCODING", 1033 | ) 1034 | parser.add_option( 1035 | "-s", 1036 | "--safe-entries", 1037 | dest="safeentries", 1038 | help="If a safe-entry file/dir is located within a path scanned " +\ 1039 | "then the path is ignored. Use safe-entry files (or dirs) " +\ 1040 | "to intentionally ignore directories of your choice that " +\ 1041 | "reside in your video library. You can specify more then one " +\ 1042 | "safe-entry by separating them with a comma (,). " +\ 1043 | "The default value(s) are '%s'" % (DEFAULT_TIDYSAFE_ENTRIES) +\ 1044 | ".", 1045 | metavar="ENTRIES", 1046 | ) 1047 | parser.add_option( 1048 | "-t", 1049 | "--always-trash", 1050 | dest="alwaystrash", 1051 | help="Identify any file extensions you wish to always trash " +\ 1052 | "if matched. By default this is not set. " +\ 1053 | "You can specify more then one trash entry by " +\ 1054 | "separating each of them with a comma (,). ", 1055 | metavar="ENTRIES", 1056 | ) 1057 | parser.add_option( 1058 | "-M", 1059 | "--meta-content", 1060 | dest="metacontent", 1061 | help="Identify any files and/or directories that should be " +\ 1062 | "treated as meta content. Meta content is only handled " +\ 1063 | "if it's the last thing within a media directory. " +\ 1064 | "You can specify more then one meta entry by " +\ 1065 | "separating each of them with a comma (,). " +\ 1066 | "By Default the following are already defined: " +\ 1067 | "'%s'." % "', '".join(OS_METADATA_ENTRIES), 1068 | metavar="ENTRIES", 1069 | ) 1070 | parser.add_option( 1071 | "-m", 1072 | "--video-minsize", 1073 | dest="video_minsize", 1074 | help="Specify the minimum size a video must be before it's " +\ 1075 | "treated as part of your collection. This value is used to " +\ 1076 | "diffentiate between video file and samples files. This value " +\ 1077 | "is interpreted in MB (Megabytes) and defaults to %d MB." % \ 1078 | DEFAULT_VIDEO_MIN_SIZE_MB, 1079 | metavar="SIZE_IN_MB", 1080 | ) 1081 | parser.add_option( 1082 | "-x", 1083 | "--video-extras", 1084 | dest="video_extras", 1085 | help="Identify the additional extensions you want to keep with your "+\ 1086 | "video files as a comma delimited. The script will scan for these " +\ 1087 | "files explicitly and remove them if a video file bearing the " +\ 1088 | "same name is not found. For this reason you do not want to " +\ 1089 | "specify video extensions here. eg. .nzb, .???.forced.srt", 1090 | metavar="ENTRIES", 1091 | ) 1092 | parser.add_option( 1093 | "-a", 1094 | "--min-age", 1095 | dest="minage", 1096 | help="Specify the minimum age a directory and/or file must be " + \ 1097 | "before considering it for processing. This value " +\ 1098 | "is interpreted in seconds and defaults to %d sec(s)." % \ 1099 | DEFAULT_MATCH_MINAGE, 1100 | metavar="AGE_IN_SEC", 1101 | ) 1102 | parser.add_option( 1103 | "-c", 1104 | "--clean", 1105 | dest="clean", 1106 | action="store_true", 1107 | help="Unless this switch is specified, this script only runs in a " +\ 1108 | "log only mode (a dry-run) allowing you to see the actions the " +\ 1109 | "script would have otherwise performed. This switch can be " +\ 1110 | "combined with the --move-path (-p) switch to move handled " +\ 1111 | "instead.", 1112 | ) 1113 | parser.add_option( 1114 | "-p", 1115 | "--move-path", 1116 | dest="move_path", 1117 | help="Identifiy the path to place content into instead of " + \ 1118 | "removing it. By specifying a --move-path, the --clean (-c) " +\ 1119 | "switch is implied however handled content is moved instead " +\ 1120 | "of being removed.", 1121 | metavar="PATH", 1122 | ) 1123 | parser.add_option( 1124 | "-k", 1125 | "--keep-directories", 1126 | dest="keep_dir", 1127 | action="store_true", 1128 | help="Do not delete video directories during cleanup." 1129 | ) 1130 | parser.add_option( 1131 | "-L", 1132 | "--logfile", 1133 | dest="logfile", 1134 | help="Send output to the specified logfile instead of stdout.", 1135 | metavar="FILE", 1136 | ) 1137 | parser.add_option( 1138 | "-D", 1139 | "--debug", 1140 | action="store_true", 1141 | dest="debug", 1142 | help="Debug Mode", 1143 | ) 1144 | options, _args = parser.parse_args() 1145 | 1146 | logger = options.logfile 1147 | if not logger: 1148 | # True = stdout 1149 | logger = True 1150 | debug = options.debug 1151 | 1152 | script_mode = None 1153 | videopaths = '' 1154 | if len(_args): 1155 | # Support command line arguments too 1156 | videopaths = ','.join(_args) 1157 | 1158 | # We always enter this part of the code, so we have to be 1159 | # careful to only set() values that have been set by an 1160 | # external switch. Otherwise we use defaults or what might 1161 | # already be resident in memory (environment variables). 1162 | _encoding = options.encoding 1163 | _video_minsize = options.video_minsize 1164 | _video_extras = options.video_extras 1165 | _minage = options.minage 1166 | _clean = options.clean 1167 | _move_path = options.move_path 1168 | _safeentries = options.safeentries 1169 | _alwaystrash = options.alwaystrash 1170 | _metacontent = options.metacontent 1171 | _keep_dir = options.keep_dir 1172 | 1173 | if _clean or _move_path or videopaths: 1174 | # By specifying one of the followings; we know for sure that the 1175 | # user is running this script manually from the command line. 1176 | # is running this as a standalone script, 1177 | 1178 | # Setting Script Mode to NONE forces main() to execute 1179 | # which is where the code for the cli() is defined 1180 | script_mode = SCRIPT_MODE.NONE 1181 | 1182 | script = TidyItScript( 1183 | logger=logger, 1184 | debug=debug, 1185 | script_mode=script_mode, 1186 | ) 1187 | 1188 | if _move_path: 1189 | # Move always trumps _clean 1190 | script.set('Mode', TIDYIT_MODE.MOVE) 1191 | script.set('MovePath', _move_path) 1192 | 1193 | elif _clean: 1194 | script.set('Mode', TIDYIT_MODE.DELETE) 1195 | 1196 | if _minage: 1197 | try: 1198 | _minage = str(abs(int(_minage))) 1199 | script.set('ProcessMinAge', _minage) 1200 | except (ValueError, TypeError): 1201 | script.logger.error( 1202 | 'An invalid `minage` (%s) was specified.' % (_minage) 1203 | ) 1204 | exit(EXIT_CODE.FAILURE) 1205 | 1206 | if _video_minsize: 1207 | try: 1208 | _video_minsize = str(abs(int(_video_minsize))) 1209 | script.set('VideoMinSize', _video_minsize) 1210 | except (ValueError, TypeError): 1211 | script.logger.error( 1212 | 'An invalid `video_minsize` (%s) was specified.' % (_video_minsize) 1213 | ) 1214 | exit(EXIT_CODE.FAILURE) 1215 | 1216 | if _safeentries: 1217 | script.set('SafeEntries', _safeentries) 1218 | 1219 | if _alwaystrash: 1220 | script.set('AlwaysTrash', _alwaystrash) 1221 | 1222 | if _keep_dir: 1223 | script.set('KeepDirectories', 'Yes') 1224 | 1225 | if _metacontent: 1226 | script.set('MetaContent', _metacontent) 1227 | 1228 | if _encoding: 1229 | script.set('SystemEncoding', _encoding) 1230 | 1231 | if _video_extras: 1232 | # Set our video extras 1233 | script.set('VideoExtras', _video_extras) 1234 | 1235 | if not script.get('VideoPaths') and videopaths: 1236 | if not _encoding: 1237 | # Force defaults if not set 1238 | script.set('SystemEncoding', DEFAULT_SYSTEM_ENCODING) 1239 | 1240 | if not (_clean or _move_path): 1241 | script.set('Mode', TIDYIT_MODE_DEFAULT) 1242 | 1243 | if not _video_minsize: 1244 | script.set('VideoMinSize', DEFAULT_VIDEO_MIN_SIZE_MB) 1245 | 1246 | if not _minage: 1247 | script.set('ProcessMinAge', DEFAULT_MATCH_MINAGE) 1248 | 1249 | if not _safeentries: 1250 | script.set('SafeEntries', DEFAULT_TIDYSAFE_ENTRIES) 1251 | 1252 | if not _alwaystrash: 1253 | script.set('AlwaysTrash', '') 1254 | 1255 | if not _metacontent: 1256 | script.set('MetaContent', '') 1257 | 1258 | if script.get('MovePath') is None: 1259 | # Allow this flag to exist 1260 | script.set('MovePath', '') 1261 | 1262 | # Force generic Video Extensions 1263 | script.set('VideoExtensions', DEFAULT_VIDEO_EXTENSIONS) 1264 | 1265 | # Force our video extras if not other wise specified 1266 | if script.get('VideoExtras') is None: 1267 | script.set('VideoExtras', '') 1268 | 1269 | # Finally set the directory the user specified for scanning 1270 | script.set('VideoPaths', videopaths) 1271 | 1272 | if script.script_mode is SCRIPT_MODE.NONE \ 1273 | and not script.get('VideoPaths'): 1274 | # Provide some CLI help when VideoPaths has been 1275 | # detected as not being identified 1276 | parser.print_help() 1277 | exit(1) 1278 | 1279 | # call run() and exit() using it's returned value 1280 | exit(script.run()) 1281 | --------------------------------------------------------------------------------