├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── dictionary.c ├── dictionary.h ├── docs ├── README.md └── _config.yml ├── dvr2plex.c ├── dvr2plex.h ├── keywords.hash └── patterns.hash /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | *.mod* 47 | *.cmd 48 | .tmp_versions/ 49 | modules.order 50 | Module.symvers 51 | Mkfile.old 52 | dkms.conf 53 | 54 | /dvr2plex 55 | .idea/ 56 | cmake-*/ 57 | keywords.h 58 | patterns.h 59 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | project(DVR2Plex) 4 | set( CMAKE_BUILD_TYPE DEBUG ) 5 | 6 | set( CMAKE_C_STANDARD 11 ) 7 | set( CMAKE_C_FLAGS "-Wall -Wextra" ) 8 | 9 | include_directories(.) 10 | 11 | file(GLOB HASHES "*.hash") 12 | set(OUTFILES) 13 | foreach(HASH ${HASHES}) 14 | 15 | string(REGEX REPLACE "(.*).hash$" "\\1.h" OUTPUT_FILE_NAME ${HASH}) 16 | 17 | add_custom_command( 18 | OUTPUT "${OUTPUT_FILE_NAME}" 19 | COMMAND hashstrings ${HASH} 20 | DEPENDS "${HASH}") 21 | 22 | set(OUTFILES ${OUTFILES} "${OUTPUT_FILE_NAME}") 23 | 24 | endforeach(HASH) 25 | 26 | add_custom_target(hashes ALL DEPENDS ${OUTFILES}) 27 | 28 | add_executable( DVR2Plex dvr2plex.c dvr2plex.h dictionary.c dictionary.h) 29 | target_link_libraries( DVR2Plex "/usr/lib/x86_64-linux-gnu/libdl.so" ) 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Paul Chambers 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/68d3bc77c19b400693c30f07f6fe0fdf)](https://www.codacy.com/manual/paul-chambers/DVR2Plex?utm_source=github.com&utm_medium=referral&utm_content=paul-chambers/DVR2Plex&utm_campaign=Badge_Grade) 2 | 3 | [Full Documentation](https://paul-chambers.github.io/DVR2Plex/) 4 | 5 | # DVR2Plex 6 | 7 | This tool uses some fancy text processing techniques to reformat filename 8 | into another one. To be useful, this tool needs to be used with other Linux 9 | command line tools, e.g. to copy (or hardlink) files to their new location. 10 | 11 | Since DVR2Plex isn't actually doing the copy/hardlinking itself, I've included 12 | a simple utility called **mkln**. I'd recommend using it, at least initially, 13 | as it's *safe* - it won't replace an existing file in the destination, 14 | (will append a number inside braces to avoid the name collision), and 15 | automatically creates directories specified by the target path that don't 16 | yet exist. 17 | 18 | ***Caution** You must accept responsibility for your configuration and use 19 | of this tool, and accept that data loss is a possibility. Please be careful 20 | when using this tool. 21 | 22 | **Note:** this tool was written for a Linux environment. It *should* work 23 | fine inside WSL (Windows Services for Linux), but has had little testing 24 | there. 25 | 26 | ## Why does this exist? 27 | 28 | I'm a long-time user of [Plex](https://plex.tv/), and use related tools to 29 | supply content for the Plex content library. Plex has a preferred way it 30 | likes to see the library organized, and things generally go more smoothly 31 | if everything uses the same organization and naming strategy. 32 | 33 | I'm also a fan of the [Channels DVR](https://getchannels.com/dvr-server/), 34 | which is well implemented and has some features that I find particularly 35 | useful. It keeps its recordins in a private directory, and while it is 36 | well-organized, it's in a way that's a little different to the structure 37 | that Plex prefers. More importantly, Channels DVR 'owns' the files in 38 | that folder, and other software should respect that, and not 'pull the rug' 39 | out from under Channels DVR by messing with those files behind its back. 40 | 41 | While you could point Plex at the Channels directories holding the 42 | recordings, and Plex will figure things out. However, it should treat 43 | those directory contents as read-only, otherwise Plex will be altering 44 | files that Channels owns. 45 | 46 | I initially wrote a shell script that hardlinked the recordings Channels 47 | DVR made in its private directory into the 'right place' in my Plex 48 | library. A 'hard link' doesn't use any more disk space, and has the 49 | positive attribute that the hard link in the Plex Library can be moved 50 | and/or renamed without affecting the one in the Channels DVR 'private' 51 | directory. The inverse is also true - the Channels DVR can delete its 52 | file in the 'private' directory, without affecting the other link to it 53 | in the Plex library, so it will remain. This is very handy when used 54 | with the 'only keep *n* episodes' in Channels DVR (or 55 | [kmttg](https://sourceforge.net/projects/kmttg) and 56 | [jellyfin](https://jellyfin.org/) for that matter). 57 | 58 | *Problem solved, right?* Well, mostly... 59 | 60 | The biggest issue with such a simple approach is that the world 61 | hasn't yet settled on how a series is named, and possibly never will. 62 | For example, a series like "Marvel's Agents of S.H.I.E.L.D." there 63 | are a number of variations on that title that you'll see in the wild. 64 | Variations like "Marvel's Agents of S.H.I.E.L.D. (2013)", or 65 | "Marvels Agents of S.H.I.E.L.D" (no single quote, no trailing period), 66 | all the way to "marvels.agents.of.shield". 67 | 68 | If that isn't accounted for, then differently-named directories will 69 | accumulate, containing episodes of the same series, often duplicates. 70 | This only gets worse as the number of content sources increases. 71 | 72 | Not good. This is the itch this tool scratches. 73 | 74 | ## OK, but what does it *do?* 75 | 76 | In a nutshell, it's a specialized string manipulation tool. You feed it 77 | the name of a media file, it parses out the series, season, episode, 78 | episode title, etc. from the name, and provides a 'template' system 79 | that allows you to easily reassemble a new name for the destination 80 | from the parts it extracted from the source filename. 81 | 82 | *That sounds like something you could do with `sed` or `awk`. So why 83 | write this?* 84 | 85 | The pattern matching is done in a loose/fuzzy way that would be 86 | impractical to do in bash script or command-line string manipulation 87 | tools. 88 | 89 | It uses a fundamentally different technique - character-mapped hashing 90 | - than the usual simple character-by-character string comparison or 91 | regular expression methods. 92 | 93 | See "How does it work?" below, if you're curious about the details. 94 | 95 | The 'template' describes the form of the string that this tool should 96 | output. The component parts are substituted in the appropriate place 97 | where you put something like "{episode}". There are quite a number of 98 | these parameters: 99 | 100 | | Parameter Name | Description | 101 | |--- |--- | 102 | | {source} | The path to the source file, as passed to this tool. | 103 | | {path} | The 'dirname' part of the source (no trailing slash) | 104 | | {basename} | The 'basename' of the source (minus the extension) | 105 | | {extension} | The extension. Separated so that if what you want to do is convert containers, you can write something like {path}/{basename}.mkv as the destination in the template | 106 | | {series} | The raw name of the series (as extracted from the source) | 107 | | {season} | Always at least two digits, zero-padded | 108 | | {seasonfolder} | If the season is zero, this will be "Specials", otherwise "Season {season}" | 109 | | {episode} | Always at least two digits, zero-padded | 110 | | {title} | The episode title | 111 | | {destseries} | This is the target folder that the tool determined (by fuzzy match) is the right destination for the file.
More details below. | 112 | | {destination} | The destination directory for the file. Also scanned as part of the fuzzy matching | 113 | | {firstaired} | The date this episode first aired *(specific to Channels DVR files)* | 114 | | {daterecorded} | The date/time Channels DVR recorded this *(specific to Channels DVR files)* | 115 | | {template} | It's a parameter too (though you can't use it in a template, obviously) | 116 | 117 | This is only the predefined list of parameters that the parsing will 118 | pre-populate automatically - except for {destination} and {template}, 119 | which need to be defined by the user. They can either be defined on the 120 | command line, or in a config file - the tool looks for 121 | `/etc/DVR2Plex.conf` and `~/.config/DVR2Plex.conf`, then will 122 | process the config file defined by the `-c` command line option, before 123 | finishing with any command line options. Parameters can be defined 124 | multiple times, the last one wins. So you could, for example, define a 125 | a default {title} as '(unknown)' in a config file, and it would be used 126 | if the file name parsing didn't find an episode title. 127 | 128 | DVR2Plex also looks for config files in the filesystem hierarchy above 129 | the source file. This is particularly useful for providing different 130 | templates and destinations for **TV** vs. **Movie** recordings, or 131 | handling a system running multiple DVR software (as I am). Obviously 132 | this check is made for each directory that DVR2Plex is asked to process 133 | files within, and so if found, these config files have 'the last say' in 134 | setting parameters, overriding everything else. 135 | 136 | You may also define your own parameters in the config file, and use them 137 | in the template. And if the output-building code can't find a parameter 138 | name that matches in its dictionaries, it will also look for an 139 | environment variable with that name (case-sensitive, in this case). So 140 | {HOME} will be replaced by the path to the user's home directory (i.e. 141 | the equivalent of '~' in the shell) 142 | 143 | The assumption is that at least one of the config file would contain 144 | at least the {destination} and {template} parameters, since those are 145 | likely to be the consistent on a given machine. 146 | 147 | For example. `/etc/DVR2Plex.conf` might contain: 148 | ``` 149 | destination = /home/video/TV 150 | template = mkln "{source}" "{destination}/{destseries?@/}{seasonfolder?@/}{destseries?@ }{season?S@}{episode?E@:-}{title? @}{extension}" 151 | ``` 152 | So assuming that the source file was 153 | `/home/Channels/TV/Person of Interest/Person of Interest S02E16 2013-02-21 Relevance 2018-12-30-0000.mpg` 154 | and a directory existed called `/home/video/TV/Person of Interest (2011)` 155 | then that template would output (or execute): 156 | 157 | `mkln "/home/video/TV/Person of Interest/Person of Interest S02E16 2013-02-21 Relevance 2018-12-30-0000.mpg" "/home/video/TV/Person of Interest (2011)/Season 02/Person of Interest (2011) S02E16 Relevence.mpg"` 158 | 159 | but perhaps more impressive is that a source file of `/home/paul/downloads/person.of.interest.2x16.relevence.mpg` 160 | would also create the same destination: 161 | `"/home/video/TV/Person of Interest (2011)/Season 02/Person of Interest (2011) S02E16 Relevence.mpg"` 162 | 163 | **Caution** DVR2Plex will blindly execute whatever you tell it to execute. It 164 | is just manipulating strings, after all. It has no notion of the quality of 165 | the source file vs. an existing destination file, and will happily overwrite 166 | a high quality file with a lower quality one if that's what you tell it to do. 167 | 168 | Be aware of this when creating a template you expect DVR2Plex to execute directly. 169 | 170 | ### Conditional Expansions 171 | *But wait, what on earth does {episode?E@:-} mean?* 172 | 173 | When a parameter isn't defined, it expands to nothing. Which is all well 174 | and good, except if there's some surrounding characters that need to 175 | disappear too. {episode?E@:-} means 'if {episode} is defined, output 'E' 176 | followed by the contents of {episode}, otherwise output just '-'. 177 | Similarly {seasonfolder} would normally be seen in a template written 178 | as {seasonfolder?@/} so that the path separator is only included if 179 | {seasonfolder} is defined. 180 | 181 | This is akin to the trinary operator in C, if you're a programmer - 182 | up the the '?' is the thing to test, after the '?' and before the ':' 183 | or '}' is the string to output if it is defined ('true'), between the 184 | ':' and '}' is the string to output if it isn't defined ('false'). 185 | Where an '@' appears, insert the value of the parameter. 186 | 187 | ## How does it work? 188 | 189 | The tool uses modified hashing to do comparisons. The hashing is 190 | modified by mapping each character through a mapping table first, so 191 | that particular characters can be mapped to another, or ignored 192 | completely. For example, upper case characters are mapped to lower case, 193 | so "UPPER" has the same hash as "upper" and "UpPeR" 194 | 195 | DVR2Plex first builds up a list of hashes for the directories 196 | found in the {destination} directory. 197 | 198 | The matching algorithm isn't confused by differing case, missing 199 | apostrophes, presence or absence of a year or country (e.g. 200 | "hells_kitchen" will match a directory named "Hell's Kitchen (US)" in 201 | the destination. 202 | 203 | This is particularly useful for the worst offenders. For example, if you 204 | have a destination folder called "Marvel's Agents of S.H.I.E.L.D. (2013)". 205 | The fuzzy matching can deal with something in the source like 206 | "marvels.agents.of.shield" and still put it in the correct folder. 207 | 208 | This fixes all-lowercase series for example, or random "Of"/"of" confusion 209 | (or "MythBusters" vs. "Mythbusters") 210 | 211 | Some characters are often dropped, like apostrophes or the trailing 212 | period of an acronym, like S.W.A.T., so those are ignored. "Marvel's" 213 | matches "marvels", "swat" or "S.W.A.T" matches "S.W.A.T." 214 | 215 | All digits are mapped to '0' (though only for the pattern matching), so 216 | we have a constant hash for patterns like S02E10, S01E05, etc. This 217 | allows us to easily find the several patterns we're looking for, very 218 | efficiently. Those patterns are mostly season/episode patterns: SnnEnn, 219 | SnnnnEnn, nXnn, nnXnn and a few less-common variations. We also identify 220 | nnnn-nn-nn as the pattern for the 'first aired' date for Channels DVR 221 | recordings, along with nnnn-nn-nn-nnnn for the date recorded. 222 | 223 | There are in fact two character maps used for hashing. One for finding 224 | patterns, and another for looking up parameters. The main difference is 225 | that the parameter mapping doesn't map all digits to zero. 226 | 227 | Internally, there are three 'dictionaries' used for searching, each is 228 | a simple key-value list, using the hash as the key. The 'main' 229 | dictionary contains all the parameters that will remain the same for 230 | the entire run. This is the dictionary populated from the config files 231 | and command line options. There is a 'file' dictionary, which contains 232 | the parsed value for the last source parsed. This is discarded and 233 | rebuilt for each source, so per-file values don't carry over from one 234 | source to the next. 235 | 236 | The third dictionary is the 'series' one, which is populated with hashes 237 | of the directory names that it finds by doing a scan of the {destination} 238 | directory. The assumption is that these are essentially the canonical 239 | names/destinations for the known TV series. For a given series directory, 240 | there may be more than one dictionary entry, as the hash without the 241 | year or country is included, as well as with it included. So the hash 242 | for "Person of Interest" is stored as well as "Person of Interest (2011)", 243 | both pointing to the target "Person of Interest (2011)" directory. 244 | Thus either form will match, and point to the right destination directory. 245 | This is the mechanism behind the {destfolder} parameter. 246 | -------------------------------------------------------------------------------- /dictionary.c: -------------------------------------------------------------------------------- 1 | // 2 | // Created by root on 8/22/19. 3 | // 4 | #include "dvr2plex.h" 5 | #include 6 | #include 7 | #include 8 | 9 | #include "dictionary.h" 10 | 11 | tDictionary * createDictionary( const char * name ) 12 | { 13 | tDictionary * result = (tDictionary *)calloc( 1, sizeof(tDictionary) ); 14 | result->name = name; 15 | return result; 16 | } 17 | 18 | void emptyDictionary( tDictionary * dictionary ) 19 | { 20 | tParam * p = dictionary->head; 21 | dictionary->head = NULL; 22 | 23 | while ( p != NULL) 24 | { 25 | if ( p->value != NULL ) 26 | { 27 | free( (void *) p->value ); 28 | } 29 | 30 | tParam * next = p->next; 31 | free( p ); 32 | p = next; 33 | } 34 | } 35 | 36 | void destroyDictionary( tDictionary * dictionary ) 37 | { 38 | emptyDictionary( dictionary ); 39 | free( dictionary ); 40 | } 41 | 42 | void printDictionary( tDictionary * dictionary ) 43 | { 44 | if ( dictionary != NULL ) 45 | { 46 | debugf( 3, "...%s dictionary...\n", dictionary->name); 47 | 48 | tParam * p = dictionary->head; 49 | while ( p != NULL ) 50 | { 51 | debugf( 3, "%16s: \"%s\"\n", lookupHash(p->hash), p->value ); 52 | p = p->next; 53 | } 54 | } 55 | } 56 | 57 | int addParam( tDictionary * dictionary, tHash hash, const char * value ) 58 | { 59 | int result = -1; 60 | 61 | tParam * p = malloc( sizeof(tParam) ); 62 | 63 | if (p != NULL) 64 | { 65 | p->hash = hash; 66 | p->value = strdup( value ); 67 | 68 | p->next = dictionary->head; 69 | dictionary->head = p; 70 | 71 | result = 0; 72 | } 73 | return result; 74 | } 75 | 76 | string findValue( tDictionary * dictionary, tHash hash ) 77 | { 78 | string result = NULL; 79 | tParam * p = dictionary->head; 80 | 81 | while (p != NULL) 82 | { 83 | if ( p->hash == hash ) 84 | { 85 | result = p->value; 86 | break; 87 | } 88 | p = p->next; 89 | } 90 | return result; 91 | } 92 | -------------------------------------------------------------------------------- /dictionary.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by root on 8/22/19. 3 | // 4 | 5 | #ifndef DVR2PLEX_DICTIONARY_H 6 | #define DVR2PLEX_DICTIONARY_H 7 | 8 | typedef unsigned long tHash; 9 | 10 | typedef struct tParam { 11 | struct tParam * next; 12 | const char * value; 13 | tHash hash; 14 | } tParam; 15 | 16 | typedef struct { 17 | tParam * head; 18 | const char * name; 19 | } tDictionary; 20 | 21 | tDictionary * createDictionary( const char * name ); 22 | void emptyDictionary( tDictionary * dictionary ); 23 | void destroyDictionary( tDictionary * dictionary ); 24 | string lookupHash( tHash ); 25 | void printDictionary( tDictionary * dictionary); 26 | int addParam( tDictionary * dictionary, tHash hash, string value ); 27 | string findValue( tDictionary * dictionary, tHash hash ); 28 | 29 | #endif // DVR2PLEX_DICTIONARY_H 30 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/68d3bc77c19b400693c30f07f6fe0fdf)](https://www.codacy.com/manual/paul-chambers/DVR2Plex?utm_source=github.com&utm_medium=referral&utm_content=paul-chambers/DVR2Plex&utm_campaign=Badge_Grade) 2 | 3 | # DVR2Plex 4 | 5 | **Caution:** If you are new to the linux command line, and/or are unfamiliar 6 | with common linux tools like 'find', I wouldn't recommend this as a good first 7 | project because of the danger of overwriting existing files. 8 | 9 | This tool uses some fancy text processing techniques to reformat filename 10 | into another one. To be useful, this tool needs to be used with other Linux 11 | command line tools, e.g. to copy (or hardlink) files to their new location. 12 | 13 | Since DVR2Plex isn't actually doing the copy/hardlinking itself, it 14 | cannot prevent the tool doing the copy from blindly overwriting an existing 15 | high-quality file with a lower quality one. Thus it's best to have your 16 | template generate a destination filename that won't overwrite any existing 17 | files. 18 | 19 | You must accept responsibility for your configuration and use of this tool, 20 | and accept that data loss is a possibility. Be careful when using this tool. 21 | 22 | **Note:** this tool was written for a Linux environment. It *should* work 23 | fine inside WSL (Windows Services for Linux), but has had little testing 24 | there. 25 | 26 | ## Why does this exist? 27 | 28 | I'm a long-time user of [Plex](https://plex.tv/), and use related tools to 29 | supply content for the Plex content library. Plex has a preferred way it 30 | likes to see the library organized, and things generally go more smoothly 31 | if everything uses the same organization and naming strategy. 32 | 33 | I'm also a fan of the [Channels DVR](https://getchannels.com/dvr-server/), 34 | which is particularly well implemented and has some features that I find 35 | particularly useful. It keeps its recordins in a private directory, and 36 | while it is well-orgainized, it's in a way that's a little different to 37 | the structure that Plex prefers. Most importantly, Channels DVR 'owns' 38 | the files in that folder, and other software must respect that, and not 39 | 'pull the rug out' from under Channels by messing with those files. 40 | 41 | While you could point Plex at the Channels directories holding the 42 | recordings, and Plex will figure things out. But it should treat those 43 | directory contents as read-only, otherwise Plex will be altering files 44 | that Channels owns. 45 | 46 | *So,* I initially wrote a shell script that hardlinked the recordings 47 | Channels DVR made in its private directory into the 'right place' in 48 | my Plex library. A hard link doesn't use any more disk space, but does 49 | mean the hard link in the Plex Library can bre moved and/or renamed 50 | without affecting the one in the Channels DVR 'private' directory. 51 | The inverse is also true - the Channels DVR can delete its file in 52 | the 'private' directory, but the other link to it in the Plex library 53 | will remain. Which is very handy if you want to tell Channels DVR to 54 | 'only keep *n* episodes' (or [kmttg](https://sourceforge.net/projects/kmttg), 55 | for that matter). 56 | 57 | *Problem solved, right?* Well, mostly... 58 | 59 | The biggest issue with such a simple approach is that the world 60 | hasn't yet settled on how a series is named, and possibly never will. 61 | For example, a series like "Marvel's Agents of S.H.I.E.L.D." there 62 | are a number of variations on that title that you'll see in the wild. 63 | Variations like "Marvel's Agents of S.H.I.E.L.D. (2013)", or 64 | "Marvels Agents of S.H.I.E.L.D" (no single quote, no trailing period), 65 | all the way to "marvels.agents.of.shield". 66 | 67 | If that isn't accounted for, then differently-named directories will 68 | accumulate, containing episodes of the same series, often duplicates. 69 | This only gets worse as the number of content sources increases. 70 | 71 | Not good. This is the itch this tool scratches. 72 | 73 | ## OK, but what does it *do?* 74 | 75 | In a nutshell, it's a specialized string manipulation tool. You feed it 76 | the name of a media file, it parses out the series, season, episode, 77 | episode title, etc. from the name, and provides a 'template' system 78 | that allows you to easily reassemble a new name for the destination 79 | from the parts it extracted from the source filename. 80 | 81 | *That sounds like something you could do with `sed` or `awk`. So why 82 | write this?* 83 | 84 | The pattern matching is done in a loose/fuzzy way that would be 85 | impractical to do in bash script or command-line string manipulation 86 | tools. 87 | 88 | It uses a fundamentally different technique - character-mapped hashing 89 | - than the usual simple character-by-character string comparison or 90 | regular expression methods. 91 | 92 | See "How does it work?" below, if you're curious about the details. 93 | 94 | The 'template' describes the form of the string that this tool should 95 | output. The component parts are substituted in the appropriate place 96 | where you put something like "{episode}". There are quite a number of 97 | these parameters: 98 | 99 | | parameter name | description | 100 | |--- |--- | 101 | | {source} | The path to the source file, as passed to this tool. | 102 | | {path} | The 'dirname' part of the source (no trailing slash) | 103 | | {basename} | The 'basename' of the source (without the extension) | 104 | | {extension} | The extension. separate so that if what you want to do is convert containers, you can use something like {path}/{basename}.mkv | 105 | | {series} | The raw name of the series (as extracted from the source | 106 | | {season} | Always at least two digits, zero-padded | 107 | | {seasonfolder} | If the season is zero, this will be "Specials", otherwise equivalent to "Season {season}" 108 | | {episode} | Always at least two digits, zero-padded | 109 | | {title} | The episode title | 110 | | {destseries} | This is the target folder that the tool determined, by a fuzzy match, is the right destination for the file.
More details below. | 111 | | {destination} | The destination directory for the file. Also used as part of the fuzzy matching | 112 | | {firstaired} | the date this episode first aired *(specific to Channels DVR files)* | 113 | | {daterecorded} | the date/time Channels DVR recorded this *(specific to Channels DVR files)* | 114 | | {template} | it's a parameter too, though you can't use it in a template | 115 | 116 | This is only the predefined list of parameters that the parsing will 117 | pre-populate automatically - except for {destination} and {template}, 118 | which need to be defined by the user. They can either be defined on the 119 | command line, or in a config file - the tool looks for 120 | `/etc/DVR2Plex.conf` and `~/.config/DVR2Plex.conf`, then will 121 | process the config file defined by the `-c` command line option, before 122 | finishing with any command line options. Parameters can be defined 123 | multiple times, the last one wins. So you could, for example, define a 124 | a default {title} as '(unknown)' in a config file, and it would be used 125 | if the file name parsing didn't find an episode title. 126 | 127 | You may also define your own parameters in the config file, and use them 128 | in the template. And if the output-building code can't find a parameter 129 | name that matches in its dictionaries, it will also look for an 130 | environment variable with that name (case-sensitive, in this case). So 131 | {HOME} will be replaced by the path to the user's home directory (i.e. 132 | the equivalent of '~') 133 | 134 | The assumption is that a config file would contain at least the 135 | {destination} and {template} parameters, since those are likely to be 136 | the consistent on a given machine. For example. `/etc/DVR2Plex.conf` 137 | might contain: 138 | ``` 139 | destination = /home/video/TV 140 | template = "{source}" "{destination}/{destseries?@/}{seasonfolder?@/}{destseries?@ }{season?S@}{episode?E@:-}{title? @}{extension}" 141 | ``` 142 | So assuming that the source file was 143 | `/home/Channels/TV/Person of Interest/Person of Interest S02E16 2013-02-21 Relevance 2018-12-30-0000.mpg` 144 | and a directory existed called `/home/video/TV/Person of Interest (2011)` 145 | then that template would output: 146 | 147 | `"/home/video/TV/Person of Interest/Person of Interest S02E16 2013-02-21 Relevance 2018-12-30-0000.mpg" "/home/video/TV/Person of Interest (2011)/Season 02/Person of Interest (2011) S02E16 Relevence.mpg"` 148 | 149 | but perhaps more impressive is that a source file of `/home/paul/downloads/person.of.interest.2x16.relevence.mpg` 150 | would also create the same destination of 151 | `"/home/video/TV/Person of Interest (2011)/Season 02/Person of Interest (2011) S02E16 Relevence.mpg"` 152 | 153 | **Caution:** It's a good practice to include something in the template 154 | that is guaranteed to make the generated name unique, so that it won't 155 | overwrite an existing file in the destination (portentially a lower 156 | quality version). Since Channel DVR recordings have an .mpg extension, 157 | you'll probably be OK, but better safe than sorry. 158 | 159 | ### Conditional Expansions 160 | *But wait, what on earth does {episode?E@:-} mean?* 161 | 162 | When a parameter isn't defined, it expands to nothing. Which is all well 163 | and good, except if there's some surrounding characters that need to 164 | disappear too. {episode?E@:-} means 'if {episode} is defined, output 'E' 165 | followed by the contents of {episode}, otherwise output just '-'. 166 | Similarly {seasonfolder} would normally be seen in a template written 167 | as {seasonfolder?@/} so that the path separator is only included if 168 | {seasonfolder} is defined. 169 | 170 | This is akin to the trinary operator in C, if you're a programmer - 171 | up the the '?' is the thing to test, after the '?' and before the ':' 172 | or '}' is the string to output if it is defined ('true'), between the 173 | ':' and '}' is the string to output if it isn't defined ('false'). 174 | Where an '@' appears, insert the value of the parameter. 175 | 176 | ## How does it work? 177 | 178 | The tool uses modified hashing to do comparisons. The hashing is 179 | modified by mapping each character through a mapping table first, so 180 | that particular characters can be mapped to another, or ignored 181 | completely. For example, upper case characters are mapped to lower case, 182 | so "UPPER" has the same hash as "upper" or "UpPeR" 183 | 184 | DVR2Plex first builds up a list of hashes for the directories 185 | found in the {destination} directory. 186 | 187 | The matching algorithm is not phased by differing case, missing 188 | apostrophes, presence or absence of a year or country (e.g. 189 | "hells_kitchen" will match a directory named "Hell's Kitchen (US)" in 190 | the destination. 191 | 192 | This is particularly useful for the worst offenders. For example, if you 193 | have a destination folder called "Marvel's Agents of S.H.I.E.L.D. (2013)". 194 | The fuzzy matching can deal with something in the source like 195 | "marvels.agents.of.shield" and still put it in the correct folder. 196 | 197 | This fixes all-lowercase series for example, or random "Of"/"of" confusion 198 | (or "MythBusters" vs. "Mythbusters") 199 | 200 | Some characters are often dropped, like apostrophes or the trailing 201 | period of an acronym, like S.W.A.T., so those are ignored. "Marvel's" 202 | matches "marvels", "swat" or "S.W.A.T" matches "S.W.A.T." 203 | 204 | All digits are mapped to '0' (though only for the pattern matching), so 205 | we have a constant hash for patterns like S02E10, S01E05, etc. This 206 | allows us to easily find the several patterns we're looking for, very 207 | efficiently. Those patterns are mostly season/episode patterns: SnnEnn, 208 | SnnnnEnn, nXnn, nnXnn and a few less-common variations. We also identify 209 | nnnn-nn-nn as the pattern for the 'first aired' date for Channels DVR 210 | recordings, along with nnnn-nn-nn-nnnn for the date recorded. 211 | 212 | There are in fact two character maps used for hashing. One for finding 213 | patterns, and another for looking up parameters. The main difference is 214 | that the parameter mapping doesn't map all digits to zero. 215 | 216 | Internally, there are three 'dictionaries' used for searching, each is 217 | a simple key-value list, using the hash as the key. The 'main' 218 | dictionary contains all the parameters that will remain the same for 219 | the entire run. This is the dictionary populated from the config files 220 | and command line options. There is a 'file' dictionary, which contains 221 | the parsed value for the last source parsed. This is discarded and 222 | rebuilt for each source, so per-file values don't carry over from one 223 | source to the next. 224 | 225 | The third dictionary is the 'series' one, which is populated with hashes 226 | of the directory names that it finds by doing a scan of the {destination} 227 | directory. The assumption is that these are essentially the canonical 228 | names/destinations for the known TV series. For a given series directory, 229 | there may be more than one dictionary entry, as the hash without the 230 | year or country is included, as well as with it included. So the hash 231 | for "Person of Interest" is stored as well as "Person of Interest (2011)", 232 | both pointing to the target "Person of Interest (2011)" directory. 233 | Thus either form will match, and point to the right destination directory. 234 | This is the mechanism behind the {destfolder} parameter. 235 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-architect -------------------------------------------------------------------------------- /dvr2plex.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright © Paul Chambers, 2019. 3 | 4 | @ToDo Switch to UTF-8 string handling, rather than relying on ASCII backwards-compatibility 5 | */ 6 | 7 | #define _XOPEN_SOURCE 700 8 | #include 9 | 10 | #include "dvr2plex.h" 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include // for basename() 20 | #include 21 | #define __USE_MISC // dirent.d_type is linux-specific, apparently 22 | #include 23 | #define __USE_GNU 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | #include "dictionary.h" 30 | 31 | 32 | /* hashes for patterns we are scanning for in the filename 33 | this hash table is used to generate hashes used to match patterns. 34 | it maps all digits to the same value, maps uppercase letters to 35 | lowercase, and ignores several characters completely. 36 | */ 37 | #include "patterns.h" 38 | 39 | /* 40 | Hashes for the keywords/parameter names in the template. This hash table is also 41 | used for series names. 42 | 43 | Periods are ignored, because the trailing one is often omitted of series like 44 | "S.W.A.T." and "Marvel's Agents of S.H.I.E.L.D.". By ignoring periods, 45 | "S.W.A.T.", "S.W.A.T" and "SWAT" will all result in the same hash value. 46 | 47 | Since periods may also be used as a separator, we have to treat ' ' and '_' as 48 | equivalent, or the hash for a space-separated name won't match the hash of a 49 | period- or underscore-seperated one. 50 | 51 | In other words, ' ', '_' and '.' do not contribute to the series hash. Similarly, 52 | apostrophes are also often omitted ("Marvel's" becomes "Marvels"), so it is 53 | similarly ignored when generating a hash, along with '?' (e.g. "Whose Line Is It 54 | Anyway?") and '!' ("I'm a Celebrity...Get Me Out of Here!"). 55 | 56 | "Marvel's Agents of S.H.I.E.L.D. (2017)" is perhaps one of the most difficult 57 | matching examples I've seen in the wild. There are so many ways to mangle that. 58 | 59 | ':' is usually converted to '-' or omitted entirely, so ignore those, too. 60 | 61 | Left and right brackets are also mapped to be equivalent, e.g. [2017] has the 62 | same hash as (2017). 63 | */ 64 | #include "keywords.h" 65 | 66 | string gMyName; 67 | int gDebugLevel = 0; 68 | unsigned int gNextYear = 1895; 69 | 70 | tDictionary * gMainDict; 71 | tDictionary * gPathDict; 72 | tDictionary * gFileDict; 73 | tDictionary * gSeriesDict; 74 | 75 | string gCachedPath = NULL; 76 | string gCachedSeries = NULL; 77 | 78 | typedef struct sToken 79 | { 80 | struct sToken * next; 81 | string start; 82 | string end; 83 | tHash hash; 84 | unsigned char seperator; 85 | } tToken; 86 | 87 | tToken gTokenList; 88 | 89 | /** 90 | * trim any trailing whitespace from the end of the string 91 | * 92 | * @param line line to be trimmed 93 | */ 94 | void trimTrailingWhitespace(char * line) 95 | { 96 | char * t = line; 97 | char * nwsp = line; 98 | 99 | if ( t != NULL ) 100 | { 101 | while (*t != '\0') 102 | { 103 | if (!isspace(*t++)) 104 | { 105 | // note: t has already been incremented 106 | nwsp = t; 107 | } 108 | } 109 | *nwsp = '\0'; 110 | } 111 | } 112 | 113 | string lookupHash(tHash hash) 114 | { 115 | tKeywordHashMapping * keywordMap = KeywordHashLookup; 116 | 117 | while ( keywordMap->key != 0 ) 118 | { 119 | if ( hash == keywordMap->key ) 120 | { 121 | return keywordMap->label; 122 | } 123 | keywordMap++; 124 | } 125 | 126 | tPatternHashMapping * patternMap = PatternHashLookup; 127 | 128 | while ( patternMap->key != 0 ) 129 | { 130 | if ( hash == patternMap->key ) 131 | { 132 | return patternMap->label; 133 | } 134 | patternMap++; 135 | } 136 | 137 | return ""; 138 | } 139 | 140 | /** 141 | * @brief look in the three dictionaries for the first occurance of a hash value 142 | * @param hash 143 | * @return 144 | */ 145 | string findParam( tHash hash ) 146 | { 147 | string result; 148 | 149 | result = findValue( gFileDict, hash ); 150 | if ( result == NULL ) 151 | { 152 | result = findValue( gPathDict, hash ); 153 | } 154 | if ( result == NULL ) 155 | { 156 | result = findValue( gMainDict, hash ); 157 | } 158 | return result; 159 | } 160 | 161 | /** 162 | Hashes the 'series' using the 'keyword' hash table, since comparing series names needs 163 | slightly different logic than scanning for patterns. Separators (spaces, periods, 164 | underscores) are ignored completely. As are \', !, amd ?, since those are frequently 165 | omitted. Upper case letters are mapped to lower case since those are also very 166 | inconsistent (no UTF-8 handling yet, though). and '&' is expanded to 'and' in the 167 | hash, so both forms will hash to the same value. 168 | 169 | Since a series name may or may not be suffixed by a year or country surrounded 170 | by brackets (e.g. (2019) or (US)). So a hash is added whenever a left bracket 171 | is encountered, so the hash for 'Some Series' and 'Some Series (2019)' are both 172 | stored in the series dictionary, so there will be a hash available to match 173 | either with or without the suffix. 174 | */ 175 | void addSeries( string series ) 176 | { 177 | tHash result = 0; 178 | unsigned char * s = (unsigned char *)series; 179 | unsigned char c; 180 | 181 | do { 182 | c = kKeywordMap[ *s++ ]; 183 | switch ( c ) 184 | { 185 | // we hash the '&' character as if 'and' was used. so both forms generate the same hash 186 | // e.g. the hash of 'Will & Grace' will match the hash of 'Will and Grace' 187 | case '&': 188 | result = fKeywordHashChar( result, 'a' ); 189 | result = fKeywordHashChar( result, 'n' ); 190 | result = fKeywordHashChar( result, 'd' ); 191 | break; 192 | 193 | case kKeywordLBracket: 194 | // we found something bracketed, e.g. (uk) or (2019), so we also add the 195 | // intermediate hash to the dictionary, before we hash the bracketed content. 196 | // Then if we hash the same series with the year omitted, for example, will 197 | // still match something. Though we can't do much about a file that omits a 198 | // a year or country, e.g. 'MacGyver' instead of 'MacGyver (2016)', or 199 | // 'Hell's Kitchen' instead of 'Hell's Kitchen (US)' 200 | // 201 | // Note: if there are multiple left brackets encountered, there will be 202 | // multiple intermediate hashes added. 203 | 204 | addParam( gSeriesDict, result, series ); 205 | result = fKeywordHashChar( result, c ); 206 | break; 207 | 208 | case '\0': 209 | case kKeywordSeparator: 210 | case kKeywordIgnored: 211 | break; 212 | 213 | default: 214 | result = fKeywordHashChar( result, c ); 215 | break; 216 | } 217 | } while ( c != '\0' ); 218 | 219 | // also add the hash of the full string, including any trailing bracketed stuff 220 | addParam( gSeriesDict, result, series ); 221 | } 222 | 223 | static int scanDirFilter( const struct dirent * entry) 224 | { 225 | int result = 0; 226 | 227 | result = ( entry->d_name[0] != '.' && entry->d_type == DT_DIR ); 228 | 229 | // debugf( 3, "%s, 0x%x, %d\n", entry->d_name, entry->d_type, result ); 230 | return result; 231 | } 232 | 233 | int buildSeriesDictionary( string path ) 234 | { 235 | struct dirent **namelist; 236 | int n; 237 | 238 | n = scandir( path, &namelist, scanDirFilter, alphasort); 239 | if ( n < 0 ) { 240 | perror("scandir"); 241 | return n; 242 | } 243 | 244 | for ( int i = 0; i < n; ++i ) 245 | { 246 | addSeries( namelist[ i ]->d_name ); 247 | free( namelist[ i ] ); 248 | } 249 | free(namelist); 250 | 251 | /* printDictionary( dictionary ); */ 252 | 253 | return 0; 254 | } 255 | 256 | void addSeasonEpisode( unsigned int season, unsigned int episode ) 257 | { 258 | char temp[50]; 259 | 260 | snprintf( temp, sizeof(temp), "%02u", season ); 261 | addParam( gFileDict, kKeywordSeason, temp ); 262 | if ( season == 0 || episode == 0 ) 263 | { 264 | addParam( gFileDict, kKeywordSeasonFolder, "Specials" ); 265 | } 266 | else 267 | { 268 | snprintf( temp, sizeof(temp), "Season %02u", season ); 269 | addParam( gFileDict, kKeywordSeasonFolder, temp ); 270 | } 271 | 272 | snprintf( temp, sizeof(temp), "%02u", episode ); 273 | addParam( gFileDict, kKeywordEpisode, temp ); 274 | } 275 | 276 | void storeSeries( string series ) 277 | { 278 | string result = series; 279 | string ptr, end; 280 | tHash hash; 281 | unsigned char c; 282 | 283 | ptr = series; 284 | hash = 0; 285 | 286 | addParam( gFileDict, kKeywordSeries, series ); 287 | 288 | // regenerate the hash incrementally, checking at each separator. 289 | // remember the longest match, i.e. keep looking until the end of the string 290 | do { 291 | c = kKeywordMap[ (unsigned char)*ptr ]; 292 | switch ( c ) 293 | { 294 | case kKeywordSeparator: 295 | case '\0': 296 | /* let's see if we have a match */ 297 | debugf( 4, "checking: 0x%016lx\n", hash ); 298 | 299 | string match = findValue( gSeriesDict, hash ); 300 | if ( match != NULL) 301 | { 302 | result = match; 303 | debugf( 3, "matched %s\n", result ); 304 | end = ptr; 305 | } 306 | break; 307 | 308 | case '&': 309 | hash = fPatternHashChar( hash, 'a' ); 310 | hash = fPatternHashChar( hash, 'n' ); 311 | hash = fPatternHashChar( hash, 'd' ); 312 | break; 313 | 314 | default: 315 | hash = fPatternHashChar( hash, c ); 316 | break; 317 | }; 318 | ptr++; 319 | } while ( c != '\0' ); 320 | 321 | if ( result != series ) 322 | { 323 | if ( *end != '\0' ) 324 | { 325 | /* if the run is longer than the match with the series name, 326 | then store the trailing remnant as the episode title */ 327 | addParam( gFileDict, kKeywordTitle, (string) end + 1 ); 328 | *(char *) end = '\0'; 329 | } 330 | } 331 | addParam( gFileDict, kKeywordDestSeries, result ); 332 | } 333 | 334 | int storeToken( tHash hash, string value ) 335 | { 336 | unsigned int season = 0; 337 | unsigned int episode = 0; 338 | unsigned int year = 0; 339 | char temp[20]; 340 | string seriesName; 341 | 342 | switch (hash) 343 | { 344 | case kPatternSnnEnn: // we found 'SnnEnn' or 345 | case kPatternSyyyyEnn: // SyyyyEnnn 346 | case kPatternSnnEn: // SnnEn 347 | case kPatternSnEnn: // SnEnn 348 | case kPatternSnEn: // SnEn 349 | debugf( 3,"SnnEnn: %s\n", value); 350 | sscanf( value, "%*1c%u%*1c%u", &season, &episode ); // ignore characters since we don't know their case 351 | addSeasonEpisode( season, episode ); 352 | break; 353 | 354 | case kPatternEnnn: 355 | debugf( 3,"Ennn: %s\n", value); 356 | sscanf( value, "%*1c%u", &episode ); // ignore characters since we don't know their case 357 | season = episode / 100; 358 | episode %= 100; 359 | addSeasonEpisode( season, episode ); 360 | break; 361 | 362 | case kPatternEnnnn: 363 | debugf( 3,"Ennnn: %s\n", value); 364 | sscanf( value, "%*1c%u", &episode ); // ignore characters since we don't know their case 365 | unsigned int divisor = 100; 366 | /* see if there's a season number to extract */ 367 | if ( ((episode / divisor) % 10) == 0 ) 368 | { 369 | /* least significant digit of season is zero, so we can increase the divisor by 10 */ 370 | divisor *= 10; 371 | } 372 | season = episode / divisor; 373 | episode %= divisor; 374 | addSeasonEpisode( season, episode ); 375 | break; 376 | 377 | case kPatternnXnn: 378 | case kPatternnnXnn: 379 | debugf( 3, "nnXnn: %s\n", value); 380 | sscanf( value, "%u%*1c%u", &season, &episode ); // ignore characters since we don't know their case 381 | addSeasonEpisode( season, episode ); 382 | break; 383 | 384 | case kPatternYear: 385 | sscanf( value, "%*1c%u%*1c", &year ); // ignore characters since we don't know their case 386 | if ( 1890 < year && year <= gNextYear ) 387 | { 388 | snprintf( temp, sizeof( temp ), "%u", year ); 389 | addParam( gFileDict, kKeywordYear, temp ); 390 | } 391 | debugf( 3, "year: %u\n", year ); 392 | break; 393 | 394 | case kPatternCountryUSA: 395 | addParam( gFileDict, kKeywordCountry, "USA" ); 396 | break; 397 | 398 | case kPatternCountryUS: 399 | addParam( gFileDict, kKeywordCountry, "US" ); 400 | break; 401 | 402 | case kPatternCountryUK: 403 | addParam( gFileDict, kKeywordCountry, "UK" ); 404 | break; 405 | 406 | case kPatternNoMatch: 407 | seriesName = findParam( kKeywordSeries ); 408 | if ( seriesName == NULL ) 409 | { 410 | debugf( 3, "series: %s\n", value ); 411 | storeSeries( value ); 412 | } 413 | else 414 | { 415 | debugf( 3, "title: %s\n", value ); 416 | addParam( gFileDict, kKeywordTitle, value ); 417 | } 418 | break; 419 | 420 | // kPatternTwoDigits: 421 | // kPatternFourDigits: 422 | // kPatternSixDigits: 423 | // kPatternEightDigits: 424 | default: 425 | break; 426 | } 427 | return 0; 428 | } 429 | 430 | tHash checkHash( tHash hash) 431 | { 432 | switch (hash) 433 | { 434 | case kPatternSnnEnn: // SnnEnn 435 | case kPatternSyyyyEnn: // SnnnnEnn 436 | case kPatternSnnEn: // SnnEn 437 | case kPatternSnEnn: // SnEnn 438 | case kPatternSnEn: // SnEn 439 | case kPatternEnnn: // Ennn 440 | case kPatternEnnnn: // Ennnn 441 | case kPatternnXnn: // nXnn 442 | case kPatternnnXnn: // nnXnn 443 | case kPatternTwoDigits: // nn 444 | case kPatternFourDigits: // nnnn 445 | case kPatternSixDigits: // nnnnnn 446 | case kPatternEightDigits: // nnnnnnnn 447 | case kPatternCountryUSA: // (USA) 448 | case kPatternCountryUS: // (US) 449 | case kPatternCountryUK: // (UK) 450 | case kPatternYear: // (nnnn) 451 | break; 452 | 453 | default: 454 | hash = kPatternNoMatch; 455 | break; 456 | } 457 | return hash; 458 | } 459 | 460 | void tokenizeName( string originalName ) 461 | { 462 | gTokenList.next = NULL; 463 | 464 | string name = strdup( originalName ); // copy it, because we'll terminate strings in place as we go 465 | 466 | if ( name != NULL) 467 | { 468 | unsigned char c; 469 | 470 | string start = name; 471 | string ptr = start; 472 | tHash hash = 0; 473 | 474 | tToken * token = &gTokenList; 475 | 476 | do { 477 | c = kPatternMap[ *(unsigned char *)ptr ]; 478 | switch ( c ) 479 | { 480 | case kPatternSeperator: 481 | case '\0': 482 | // reached the end of a token 483 | token->next = calloc( 1, sizeof(tToken) ); 484 | token = token->next; 485 | if ( token != NULL ) 486 | { 487 | token->hash = checkHash( hash ); 488 | token->start = start; 489 | token->end = ptr; 490 | token->seperator = *ptr; 491 | *(char *)ptr = '\0'; 492 | } 493 | // only prepare for the next run if we're not at the end of the string 494 | if ( c != '\0' ) 495 | { 496 | // skip over a run of kPatternSeperator, if present (e.g. ' - ') 497 | do { ptr++; } while ( kPatternMap[ *(unsigned char *)ptr ] == kPatternSeperator ); 498 | start = ptr; 499 | hash = 0; 500 | } 501 | break; 502 | 503 | case '&': 504 | hash = fPatternHashChar( hash, 'a' ); 505 | hash = fPatternHashChar( hash, 'n' ); 506 | hash = fPatternHashChar( hash, 'd' ); 507 | ptr++; 508 | break; 509 | 510 | default: 511 | hash = fPatternHashChar( hash, c ); 512 | ptr++; 513 | break; 514 | }; 515 | } while ( c != '\0' ); 516 | 517 | token = gTokenList.next; 518 | while ( token != NULL ) 519 | { 520 | debugf( 4, "token: \'%s\', \'%s\' (%c)\n", lookupHash( token->hash ), token->start, token->seperator ); 521 | token = token->next; 522 | } 523 | } 524 | } 525 | 526 | void freeTokenList( void ) 527 | { 528 | tToken *nextToken; 529 | tToken * token = gTokenList.next; 530 | gTokenList.next = NULL; 531 | while ( token != NULL ) 532 | { 533 | nextToken = token->next; 534 | free( token ); 535 | token = nextToken; 536 | } 537 | } 538 | 539 | /* 540 | * Channels DVR: 541 | * air date: yyyy-mm-dd 542 | * recorded: yyyy-mm-dd-hhss 543 | * TVMosaic 544 | * recorded: hhss-yyyymmdd 545 | * 546 | */ 547 | void mergeDigits( void ) 548 | { 549 | tToken * token[4]; 550 | 551 | token[0] = gTokenList.next; 552 | 553 | while ( token[0] != NULL) 554 | { 555 | token[1] = token[0]->next; 556 | switch ( token[0]->hash ) 557 | { 558 | // Channels DVR: YYYY-mm-dd 559 | // YYYY-mm-dd-hhss 560 | // TVMosaic: HHSS-yyyymmdd 561 | case kPatternFourDigits: 562 | if ( token[1] != NULL) 563 | { 564 | token[2] = token[1]->next; 565 | 566 | switch ( token[1]->hash ) 567 | { 568 | // Channels DVR: YYYY-MM-dd 569 | // YYYY-MM-dd-hhss 570 | case kPatternTwoDigits: 571 | if ( token[1]->seperator == '-' && token[2] != NULL) 572 | { 573 | switch ( token[2]->hash ) 574 | { 575 | // Channels DVR: YYYY-MM-DD 576 | // YYYY-MM-DD-hhss 577 | case kPatternTwoDigits: 578 | token[3] = token[2]->next; 579 | if (token[3] != NULL) 580 | { 581 | switch ( token[3]->hash ) 582 | { 583 | case kPatternFourDigits: 584 | // ok, looks like we have YYYY-MM-DD-HHSS 585 | token[0]->next = token[3]->next; 586 | *(char *) token[0]->end = '-'; 587 | *(char *) token[1]->end = '-'; 588 | *(char *) token[2]->end = '-'; 589 | token[0]->end = token[3]->end; 590 | token[0]->hash = kKeywordDateRecorded; 591 | free( token[1] ); 592 | free( token[2] ); 593 | free( token[3] ); 594 | break; 595 | 596 | default: 597 | // ok, looks like we have YYYY-MM-DD 598 | token[0]->next = token[2]->next; 599 | *(char *) token[0]->end = '-'; 600 | *(char *) token[1]->end = '-'; 601 | token[0]->end = token[2]->end; 602 | token[0]->hash = kKeywordFirstAired; 603 | free( token[1] ); 604 | free( token[2] ); 605 | break; 606 | } 607 | } 608 | break; 609 | 610 | default: 611 | break; 612 | } 613 | } 614 | break; 615 | 616 | // TVMosaic: HHSS-YYYYMMDD 617 | case kPatternEightDigits: 618 | token[0]->next = token[1]->next; 619 | *(char *) token[0]->end = '-'; 620 | token[0]->end = token[1]->end; 621 | token[0]->hash = kKeywordDateRecorded; 622 | free( token[1] ); 623 | break; 624 | 625 | default: 626 | token[0]->hash = kPatternNoMatch; 627 | break; 628 | } 629 | } 630 | else 631 | { 632 | // last token, therefore four trailing digits, no metapattern 633 | token[0]->hash = kPatternNoMatch; 634 | } 635 | break; 636 | 637 | default: 638 | // not kPatternFourDigits, ignore it. 639 | break; 640 | } 641 | token[0] = token[0]->next; 642 | } 643 | } 644 | 645 | void mergeNoMatch( void ) 646 | { 647 | tToken * token; 648 | tToken * nextToken; 649 | 650 | token = gTokenList.next; 651 | while ( token != NULL) 652 | { 653 | nextToken = token->next; 654 | if ( nextToken != NULL && token->hash == kPatternNoMatch && nextToken->hash == kPatternNoMatch ) 655 | { 656 | // combine the two kPatternNoMatch tokens 657 | token->next = nextToken->next; 658 | *(char *)token->end = ' '; 659 | token->end = nextToken->end; 660 | free( nextToken ); 661 | } 662 | else 663 | { 664 | token = token->next; 665 | } 666 | } 667 | 668 | /* Some tokens should also be appended as a suffix, while also retaining the token */ 669 | token = gTokenList.next; 670 | while ( token != NULL) 671 | { 672 | nextToken = token->next; 673 | if ( token->hash == kPatternNoMatch && nextToken != NULL ) 674 | { 675 | switch ( nextToken->hash ) 676 | { 677 | /* The tokens we treat as suffixes */ 678 | case kPatternCountryUK: 679 | case kPatternCountryUS: 680 | case kPatternCountryUSA: 681 | case kPatternYear: 682 | /* extend the kPatternNoMatch token to include the suffix */ 683 | *(char *)token->end = ' '; 684 | token->end = nextToken->end; 685 | break; 686 | 687 | default: 688 | break; 689 | } 690 | } 691 | token = token->next; 692 | } 693 | } 694 | 695 | int parseName( string name ) 696 | { 697 | tToken * token = gTokenList.next; 698 | 699 | tokenizeName( name ); 700 | 701 | mergeDigits(); 702 | mergeNoMatch(); 703 | 704 | debugf( 4, "%s\n", "after merging" ); 705 | token = gTokenList.next; 706 | while ( token != NULL) 707 | { 708 | debugf( 4, "token: \'%s\', \'%s\' (%c)\n", lookupHash( token->hash ), token->start, token->seperator ); 709 | 710 | storeToken( token->hash, token->start ); 711 | token = token->next; 712 | } 713 | 714 | freeTokenList(); 715 | 716 | return 0; 717 | } 718 | 719 | 720 | /* 721 | * carve up the path into directory path, basename and extension 722 | * then pass basename onto parseName() to be processed 723 | */ 724 | int parsePath( string path ) 725 | { 726 | int result = 0; 727 | 728 | addParam( gFileDict, kKeywordSource, path ); 729 | 730 | string lastPeriod = strrchr( path, '.' ); 731 | string lastChar = path + strlen(path); 732 | if ( lastPeriod != NULL && (lastChar - lastPeriod) < 5 ) 733 | { 734 | addParam( gFileDict, kKeywordExtension, lastPeriod ); 735 | } 736 | else 737 | { 738 | lastPeriod = lastChar; 739 | } 740 | 741 | string lastSlash = strrchr( path, '/' ); 742 | if ( lastSlash != NULL ) 743 | { 744 | string p = strndup( path, lastSlash - path ); 745 | addParam( gFileDict, kKeywordPath, p ); 746 | free( (void *)p ); 747 | 748 | ++lastSlash; 749 | } 750 | else 751 | { 752 | lastSlash = path; // no directories prefixed 753 | } 754 | 755 | string basename = strndup( lastSlash, lastPeriod - lastSlash ); 756 | addParam( gFileDict, kKeywordBasename, basename ); 757 | parseName( basename ); 758 | free( (void *)basename ); 759 | 760 | return result; 761 | } 762 | 763 | string buildString( string template ) 764 | { 765 | string result = NULL; 766 | string t = template; 767 | char * s; // pointer into the returned string 768 | 769 | result = calloc( 1, 32768 ); 770 | s = (char *)result; 771 | 772 | if ( s != NULL ) 773 | { 774 | unsigned char c = *t++; // unsigned because it is used as an array subscript when calculating the hash 775 | while ( c != '\0' ) 776 | { 777 | unsigned long hash; 778 | string k; 779 | 780 | switch (c) 781 | { 782 | case '{': // start of keyword 783 | k = t; // remember where the keyword starts 784 | 785 | // scan the keyword and generate its hash 786 | hash = 0; 787 | 788 | c = *t++; 789 | while ( c != '\0' && c != '}' && c != '?' ) 790 | { 791 | if ( kKeywordMap[ c ] != kKeywordSeparator ) /* we ignore some characters when calculating the hash */ 792 | { 793 | hash = fKeywordHashChar( hash, c ); 794 | } 795 | c = *t++; 796 | } 797 | 798 | if ( hash != kKeywordTemplate ) // don't want to expand a {template} keyword in a template! 799 | { 800 | string value = findParam( hash ); 801 | 802 | if ( value == NULL ) // not in the dictionaries, check for an environment variable 803 | { 804 | string envkey = strndup( k, t - k - 1 ); 805 | value = getenv( envkey ); 806 | if ( value != NULL ) 807 | { 808 | debugf( 3, "env=\"%s\", value=\"%s\"\n", envkey, value ); 809 | } 810 | free( (void *)envkey ); 811 | } 812 | 813 | if ( c != '?' ) 814 | { 815 | // end of keyword, and not the beginning of a ternary expression 816 | if ( value != NULL ) 817 | { 818 | s = stpcpy( s, value ); 819 | } 820 | } 821 | else 822 | { // ternary operator, like {param?true:false} (true or false can be absent) 823 | 824 | c = *t++; 825 | 826 | if ( value != NULL ) 827 | { 828 | // copy the 'true' clause 829 | while ( c != '}' && c != ':' && c != '\0' ) 830 | { 831 | if ( c != '@' ) 832 | { 833 | *s++ = c; 834 | } 835 | else 836 | { 837 | s = stpcpy( s, value ); 838 | } 839 | 840 | c = *t++; 841 | } 842 | 843 | if ( c == ':' ) 844 | { 845 | // skip over the 'false' clause 846 | while ( c != '\0' && c != '}' ) 847 | { 848 | c = *t++; 849 | } 850 | } 851 | } 852 | else // if undefined, skip over 'true' pattern, find the ':' (or trailing '}') 853 | { 854 | // value is undefined, so skip ahead to the false clause (or keyword end) 855 | while ( c != ':' && c != '}' && c != '\0' ) 856 | { 857 | c = *t++; 858 | } 859 | 860 | if ( c == ':' ) // did we find the 'false' clause? 861 | { 862 | c = *t++; // yep, so swallow the colon 863 | // copy the 'false' clause into the string 864 | // no '@' processing, as the parameter is not defined 865 | while ( c != '\0' && c != '}' ) 866 | { 867 | *s++ = c; 868 | c = *t++; 869 | } 870 | } 871 | } 872 | } 873 | } // if !{template} 874 | break; 875 | 876 | case '\\': // next template character is escaped, not interpreted, e.g. \{ 877 | c = *t++; 878 | *s++ = c; 879 | break; 880 | 881 | default: 882 | *s++ = c; 883 | break; 884 | } // switch 885 | 886 | c = *t++; 887 | } 888 | 889 | *s = '\0'; // always terminate the string 890 | } 891 | return result; 892 | } 893 | 894 | int parseConfigFile( tDictionary * dictionary, string path ) 895 | { 896 | int result = 0; 897 | FILE * file; 898 | char buffer[ 4096 ]; // 4K seems like plenty 899 | 900 | if ( eaccess( path, R_OK ) != 0 ) // only attempt to parse it if there's something accessible there 901 | { 902 | // it's OK if the file is missing, otherwise complain 903 | if ( errno != ENOENT ) 904 | { 905 | fprintf( stderr, 906 | "### Error: Unable to access config file \'%s\' (%d: %s)", 907 | path, errno, strerror(errno)); 908 | result = errno; 909 | } 910 | } 911 | else 912 | { 913 | debugf( 3, "config file: \'%s\'\n", path ); 914 | 915 | file = fopen(path, "r"); 916 | if (file == NULL) 917 | { 918 | fprintf( stderr, "### Error: Unable to open config file \'%s\' (%d: %s)\n", 919 | path, errno, strerror(errno) ); 920 | result = errno; 921 | } 922 | else 923 | { 924 | while ( fgets( buffer, sizeof( buffer ), file) != NULL ) 925 | { 926 | trimTrailingWhitespace( buffer ); 927 | debugf( 4,"line: \'%s\'\n", buffer ); 928 | 929 | tHash hash = 0; 930 | string s = buffer; 931 | while (isspace(*s)) { 932 | s++; 933 | } 934 | 935 | unsigned char c = (unsigned char) *s++; 936 | if (c != '\0') { 937 | while (c != '\0' && c != '=') { 938 | if ( c != kKeywordSeparator ) { 939 | hash = fKeywordHashChar( hash, c ); 940 | } 941 | c = (unsigned char) *s++; 942 | } 943 | 944 | if (c == '=') { 945 | // skip over whitespace from the beginning of the value 946 | while ( isspace(*s) ) { 947 | s++; 948 | } 949 | trimTrailingWhitespace( (char *)s ); 950 | } 951 | debugf( 4,"hash = 0x%016lx, value = \'%s\'\n", hash, s); 952 | addParam( dictionary, hash, s ); 953 | } 954 | } 955 | fclose(file); 956 | } 957 | } 958 | 959 | return result; 960 | } 961 | 962 | /** 963 | * @brief Look for config files to process, and use them to update the main dictionary. 964 | * 965 | * First, look in /etc/.conf then in ~/.config/.conf, and finally the file 966 | * passed as a -c parameter, if any, then any parameters on the command line (except -c) 967 | * Where a parameter occurs more than once in a dictionary, the most recent definition 'wins' 968 | */ 969 | 970 | int parseConfig( string path ) 971 | { 972 | int result = 0; 973 | char temp[PATH_MAX]; 974 | 975 | snprintf( temp, sizeof( temp ), "/etc/%s.conf", gMyName ); 976 | debugf( 4, "/etc path: \"%s\"\n", temp ); 977 | 978 | result = parseConfigFile( gMainDict, temp ); 979 | 980 | if ( result == 0 ) 981 | { 982 | string home = getenv("HOME"); 983 | if ( home == NULL) 984 | { 985 | home = getpwuid( getuid() )->pw_dir; 986 | } 987 | if ( home != NULL ) 988 | { 989 | snprintf( temp, sizeof( temp ), "%s/.config/%s.conf", home, gMyName ); 990 | debugf( 4, "~ path: \"%s\"\n", temp ); 991 | 992 | result = parseConfigFile( gMainDict, temp ); 993 | } 994 | } 995 | 996 | if ( result == 0 && path != NULL ) 997 | { 998 | struct stat fileStat; 999 | 1000 | if ( stat( path, &fileStat ) != 0 ) 1001 | { 1002 | fprintf( stderr, "### Error: config path '%s' is not valid (%d: %s)\n", 1003 | path, errno, strerror(errno) ); 1004 | result = -1; 1005 | } 1006 | switch ( fileStat.st_mode & S_IFMT ) 1007 | { 1008 | case S_IFDIR: 1009 | snprintf( temp, sizeof( temp ), "%s/%s.conf", path, gMyName ); 1010 | break; 1011 | 1012 | case S_IFLNK: 1013 | case S_IFREG: 1014 | strncpy( temp, path, sizeof( temp ) ); 1015 | break; 1016 | 1017 | default: 1018 | fprintf( stderr, "### Error: config path '%s' is neither a file nor directory.\n", path ); 1019 | result = -1; 1020 | break; 1021 | } 1022 | 1023 | if ( result == 0 ) 1024 | { 1025 | debugf( 4, "-c path: %s\n", temp ); 1026 | result = parseConfigFile( gMainDict, temp ); 1027 | } 1028 | } 1029 | 1030 | return result; 1031 | } 1032 | 1033 | /** 1034 | * @brief recurive function to walk the path looking for config files 1035 | * @param gFileDict 1036 | * @param path 1037 | */ 1038 | void _recurseConfig( tDictionary * dictionary, string path ) 1039 | { 1040 | char temp[PATH_MAX]; 1041 | 1042 | if ( strlen(path) != 1 || (path[0] != '/' && path[0] != '.')) 1043 | { 1044 | strncpy( temp, path, sizeof( temp ) ); 1045 | _recurseConfig( dictionary, dirname( temp ) ); 1046 | /* check for a config file & if found, parse it */ 1047 | debugf( 4, "recurse = \'%s\'\n", path ); 1048 | snprintf( temp, sizeof(temp), "%s/%s.conf", path, gMyName ); 1049 | parseConfigFile( dictionary, temp ); 1050 | } 1051 | } 1052 | 1053 | /** 1054 | Traverse the path to the source file, looking for config files. 1055 | Apply them in the reverse order, so ones lower in the hierarchy 1056 | can override parameters defined in higher ones. 1057 | */ 1058 | int processConfigPath( string path ) 1059 | { 1060 | int result = 0; 1061 | char temp[PATH_MAX]; 1062 | char * absolute; 1063 | 1064 | /* dirname may modify its argument, so make a copy first */ 1065 | strncpy( temp, path, sizeof(temp) ); 1066 | absolute = realpath( dirname(temp), NULL ); 1067 | if ( absolute == NULL ) 1068 | { 1069 | fprintf( stderr, "### Error: path \'%s\' appears to be invalid (%d: %s).\n", 1070 | path, errno, strerror(errno) ); 1071 | return -5; 1072 | } 1073 | else 1074 | { 1075 | debugf( 3, "abs = %s, cached = %s\n", absolute, gCachedPath ); 1076 | if ( gCachedPath == NULL || strcmp( gCachedPath, absolute ) != 0 ) 1077 | { 1078 | debugf( 3, "absolute = \'%s\'\n", absolute ); 1079 | emptyDictionary( gPathDict ); 1080 | gCachedPath = absolute; 1081 | _recurseConfig( gPathDict, absolute ); 1082 | } 1083 | 1084 | /* we may have picked up a new definition of {destination} as 1085 | * a result of parsing different config files. If so, we need 1086 | * to rebuild gSeriesDict to reflect the new destination */ 1087 | 1088 | string destination = findParam( kKeywordDestination ); 1089 | 1090 | if ( destination == NULL) 1091 | { 1092 | fprintf( stderr, "### Error: no destination defined.\n" ); 1093 | result = -3; 1094 | } 1095 | else 1096 | { 1097 | if ( gCachedSeries == NULL || strcmp( gCachedSeries, destination ) != 0 ) 1098 | { 1099 | debugf( 2, "destination = \'%s\'\n", destination ); 1100 | // fill the dictionary with hashes of the directory names in the destination 1101 | emptyDictionary( gSeriesDict ); 1102 | gCachedSeries = destination; 1103 | buildSeriesDictionary( destination ); 1104 | } 1105 | } 1106 | } 1107 | return result; 1108 | } 1109 | 1110 | int processFile( string path ) 1111 | { 1112 | int result = 0; 1113 | 1114 | processConfigPath( path ); 1115 | 1116 | parsePath( path ); 1117 | 1118 | printDictionary( gFileDict ); 1119 | 1120 | string template = findParam( kKeywordTemplate ); 1121 | 1122 | if ( template == NULL) 1123 | { 1124 | fprintf( stderr, "### Error: no template found.\n" ); 1125 | result = -2; 1126 | } 1127 | else 1128 | { 1129 | debugf( 2, "template = \'%s\'\n", template ); 1130 | 1131 | string output = buildString( template ); 1132 | string exec = findParam( kKeywordExecute ); 1133 | if ( exec != NULL) 1134 | { 1135 | result = system( output ); 1136 | } 1137 | else 1138 | { 1139 | printf( "%s\n", output ); 1140 | } 1141 | free( (void *)output ); 1142 | } 1143 | emptyDictionary( gFileDict ); 1144 | return result; 1145 | } 1146 | 1147 | string usage = 1148 | "Command Line Options\n" 1149 | " -d set {destination} parameter\n" 1150 | " -t set {template} paameter\n" 1151 | " -x pass each output string to the shell to execute\n" 1152 | " -- read from stdin\n" 1153 | " -0 stdin is null-terminated (also implies '--' option)\n" 1154 | " -v set the level of verbosity (debug info)\n"; 1155 | 1156 | 1157 | int main( int argc, string argv[] ) 1158 | { 1159 | int result; 1160 | int cnt; 1161 | string configPath = NULL; 1162 | time_t secsSinceEpoch; 1163 | struct tm *timeStruct; 1164 | 1165 | gMainDict = createDictionary( "Main" ); 1166 | gSeriesDict = createDictionary( "Series" ); 1167 | gPathDict = createDictionary( "Path" ); 1168 | gFileDict = createDictionary( "File" ); 1169 | 1170 | gMyName = basename( strdup( argv[0] ) ); // posix flavor of basename modifies its argument 1171 | 1172 | secsSinceEpoch = time( NULL ); 1173 | timeStruct = localtime( &secsSinceEpoch ); 1174 | if ( timeStruct != NULL ) 1175 | { 1176 | gNextYear = timeStruct->tm_year + 1900 + 1; 1177 | } 1178 | 1179 | int k = 1; 1180 | cnt = argc; 1181 | for ( int i = 1; i < argc; i++ ) 1182 | { 1183 | debugf( 4, "a: i = %d, k = %d, cnt = %d, \'%s\'\n", i, k, cnt, argv[ i ] ); 1184 | 1185 | // is it the config file option? 1186 | if ( strcmp( argv[ i ], "-c" ) == 0 ) 1187 | { 1188 | cnt -= 2; 1189 | ++i; 1190 | configPath = strdup( argv[ i ] ); // make a copy - argv will be modified 1191 | } 1192 | else 1193 | { 1194 | if ( i != k ) 1195 | { 1196 | argv[ k ] = argv[ i ]; 1197 | } 1198 | ++k; 1199 | } 1200 | } 1201 | argc = cnt; 1202 | 1203 | result = parseConfig( configPath ); 1204 | 1205 | if ( configPath != NULL ) 1206 | { 1207 | free( (void *)configPath ); 1208 | configPath = NULL; 1209 | } 1210 | 1211 | k = 1; 1212 | for ( int i = 1; i < argc && result == 0; i++ ) 1213 | { 1214 | debugf( 4, "b: i = %d, k = %d, cnt = %d, \'%s\'\n", i, k, cnt, argv[i] ); 1215 | 1216 | // is it an option? 1217 | if (argv[i][0] == '-' ) 1218 | { 1219 | char option = argv[i][1]; 1220 | if ( argv[i][2] != '\0' ) 1221 | { 1222 | fprintf( stderr, "### Error: option \'%s\' not understood.\n", argv[ i ] ); 1223 | fprintf( stderr, "%s", usage ); 1224 | result = -1; 1225 | } 1226 | else 1227 | { 1228 | --cnt; 1229 | 1230 | switch ( option ) 1231 | { 1232 | // case 'c': // config file already handled 1233 | // break; 1234 | 1235 | case 'd': // destination 1236 | addParam( gMainDict, kKeywordDestination, argv[ i ] ); 1237 | --cnt; 1238 | ++i; 1239 | break; 1240 | 1241 | case 't': // template 1242 | addParam( gMainDict, kKeywordTemplate, argv[ i ] ); 1243 | --cnt; 1244 | ++i; 1245 | break; 1246 | 1247 | case 'x': // execute 1248 | addParam( gMainDict, kKeywordExecute, "yes" ); 1249 | break; 1250 | 1251 | case '-': // also read lines from stdin 1252 | addParam( gMainDict, kKeywordStdin, "yes" ); 1253 | break; 1254 | 1255 | case '0': // entries from stdio are terminated with NULLs 1256 | addParam( gMainDict, kKeywordStdin, "yes" ); 1257 | addParam( gMainDict, kKeywordNullTermination, "yes" ); 1258 | break; 1259 | 1260 | case 'v': // verbose output, i.e. show debug logging 1261 | if ( i < argc - 1 ) 1262 | { 1263 | ++i; 1264 | --cnt; 1265 | 1266 | gDebugLevel = atoi( argv[i] ); 1267 | fprintf(stderr, "verbosity = %d\n", gDebugLevel ); 1268 | } 1269 | break; 1270 | 1271 | default: 1272 | ++cnt; 1273 | --i; // point back at the original option 1274 | fprintf( stderr, "### Error: option \'%s\' not understood.\n", argv[ i ] ); 1275 | fprintf( stderr, "%s", usage ); 1276 | result = -1; 1277 | break; 1278 | } 1279 | } 1280 | } 1281 | else 1282 | { 1283 | if ( i != k ) 1284 | { 1285 | argv[k] = argv[i]; 1286 | } 1287 | ++k; 1288 | } 1289 | } 1290 | argc = cnt; 1291 | 1292 | /* printDictionary( mainDict ); */ 1293 | 1294 | for ( int i = 1; i < argc; i++ ) 1295 | { 1296 | debugf( 4, "b: i = %d, k = %d, cnt = %d, \'%s\'\n", i, k, cnt, argv[i] ); 1297 | 1298 | // is it an option? 1299 | if ( argv[i][0] == '-' ) 1300 | { 1301 | char option = argv[i][1]; 1302 | if ( argv[i][2] != '\0' ) 1303 | { 1304 | fprintf( stderr, "### Error: option \'%s\' not understood.\n", argv[i] ); 1305 | result = -1; 1306 | } 1307 | else 1308 | { 1309 | --cnt; 1310 | 1311 | switch ( option ) 1312 | { 1313 | case 'd': // destination 1314 | addParam( gMainDict, kKeywordDestination, argv[i] ); 1315 | --cnt; 1316 | ++i; 1317 | break; 1318 | 1319 | case 't': // template 1320 | addParam( gMainDict, kKeywordTemplate, argv[i] ); 1321 | --cnt; 1322 | ++i; 1323 | break; 1324 | 1325 | case 'x': // execute 1326 | addParam( gMainDict, kKeywordExecute, "yes" ); 1327 | break; 1328 | 1329 | case '-': // also read lines from stdin 1330 | addParam( gMainDict, kKeywordStdin, "yes" ); 1331 | break; 1332 | 1333 | case '0': // entries from stdio are terminated with NULLs 1334 | addParam( gMainDict, kKeywordNullTermination, "yes" ); 1335 | break; 1336 | 1337 | case 'v': //verbose output, i.e. debug logging 1338 | if ( i < argc - 1 ) 1339 | { 1340 | ++i; 1341 | --cnt; 1342 | 1343 | gDebugLevel = atoi( argv[i] ); 1344 | fprintf( stderr, "verbosity = %d\n", gDebugLevel ); 1345 | } 1346 | break; 1347 | 1348 | default: 1349 | ++cnt; 1350 | --i; // point back at the original option 1351 | fprintf( stderr, "### Error: option \'%s\' not understood.\n", argv[i] ); 1352 | result = -1; 1353 | break; 1354 | } 1355 | } 1356 | } 1357 | else 1358 | { 1359 | if ( i != k ) 1360 | { 1361 | argv[k] = argv[i]; 1362 | } 1363 | ++k; 1364 | } 1365 | } 1366 | argc = cnt; 1367 | 1368 | printDictionary( gMainDict ); 1369 | 1370 | for ( int i = 1; i < argc && result == 0; ++i ) 1371 | { 1372 | debugf( 4, "%d: \'%s\'\n", i, argv[ i ] ); 1373 | processFile( argv[i] ); 1374 | } 1375 | 1376 | // should we also read from stdin? 1377 | if ( findParam( kKeywordStdin ) != NULL ) 1378 | { 1379 | char line[PATH_MAX]; 1380 | 1381 | if ( findParam( kKeywordNullTermination ) != NULL ) 1382 | { 1383 | // ...therefore lines are terminated by \0 1384 | char * p = line; 1385 | cnt = sizeof( line ); 1386 | 1387 | while (!feof(stdin)) 1388 | { 1389 | char c = fgetc( stdin ); 1390 | *p++ = c; 1391 | cnt--; 1392 | 1393 | if ( c == '\0' || cnt < 1 ) 1394 | { 1395 | debugf( 4, "null: %s\n", line ); 1396 | processFile( line ); 1397 | 1398 | p = line; 1399 | cnt = sizeof( line ); 1400 | } 1401 | } 1402 | } 1403 | else 1404 | { 1405 | while (!feof(stdin)) 1406 | { 1407 | // ...otherwise lines are terminated by \n 1408 | fgets( line, sizeof(line), stdin ); 1409 | 1410 | // lop off the inevitable trailing newline(s)/whitespace 1411 | trimTrailingWhitespace( line ); 1412 | debugf( 4,"eol: %s\n", line); 1413 | processFile( line); 1414 | } 1415 | } 1416 | } 1417 | 1418 | // all done, clean up. 1419 | destroyDictionary( gFileDict ); 1420 | destroyDictionary( gPathDict ); 1421 | destroyDictionary( gSeriesDict ); 1422 | destroyDictionary( gMainDict ); 1423 | 1424 | return result; 1425 | } 1426 | -------------------------------------------------------------------------------- /dvr2plex.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Paul on 4/4/2019. 3 | // 4 | 5 | #ifndef DVR2PLEX_H 6 | #define DVR2PLEX_H 7 | 8 | #if CMAKE_BUILD_TYPE == Debug 9 | #define DEBUG 1 10 | #endif 11 | 12 | typedef const char * string; 13 | 14 | extern int gDebugLevel; 15 | #define debugf( level, format, ... ) do { if (gDebugLevel >= level) fprintf( stderr, format, __VA_ARGS__ ); } while (0) 16 | 17 | #endif // DVR2PLEX_H 18 | -------------------------------------------------------------------------------- /keywords.hash: -------------------------------------------------------------------------------- 1 | // 2 | // Created by paul on 1/14/20. 3 | // 4 | 5 | prefix = "Keyword" 6 | 7 | # the character mapping array 8 | # array starts out mapping one-to-one, i.e. input = output 9 | # 10 | mappings: 11 | { 12 | # make the generated hashes case-insensitive 13 | ignoreCase = true 14 | 15 | # mark these characters as 'kKeywordSeparator' 16 | Separator = " ._-" 17 | 18 | Ignored = "?!" 19 | 20 | # mark the different styles of brackets as equivalent 21 | LBracket = "({[" 22 | RBracket = ")}]" 23 | } 24 | 25 | # the strings to hash into the enum 26 | # if there's a comma, first string is for symbol, second is to hash 27 | # 28 | keywords = [ 29 | "Basename", 30 | "Country", 31 | "DateRecorded", 32 | "DestSeries", 33 | "Destination", 34 | "Episode", 35 | "Execute", 36 | "Extension", 37 | "FirstAired", 38 | "NullTermination", 39 | "Path", 40 | "Season", 41 | "SeasonFolder", 42 | "Series", 43 | "Source", 44 | "Stdin", 45 | "Template", 46 | "Title", 47 | "Year" 48 | ] -------------------------------------------------------------------------------- /patterns.hash: -------------------------------------------------------------------------------- 1 | // 2 | // Created by paul on 1/14/20. 3 | // 4 | 5 | prefix = "Pattern" 6 | 7 | # the character mapping array 8 | # array starts out mapping one-to-one, i.e. input = output 9 | # 10 | mappings: 11 | { 12 | # make the generated hashes case-insensitive 13 | ignoreCase = true 14 | 15 | # mark these characters as 'kPatternSeperator' 16 | Seperator = " ._-" 17 | 18 | # mark this range as 'kPatternDigit' 19 | Digit = "0-9" 20 | 21 | # mark the different styles of brackets as equivalent 22 | LBracket = "({[" 23 | RBracket = ")}]" 24 | } 25 | 26 | # the strings to hash into the enum 27 | # if there's a comma, first string is for symbol, second is to hash 28 | # 29 | keywords = [ 30 | "SnnEnn,S00E00", 31 | "SyyyyEnn,S0000E00", 32 | "SnnEn,S00E0", 33 | "SnEnn,S0E00", 34 | "SnEn,S0E0", 35 | "Ennn,E000", 36 | "Ennnn,E0000", 37 | "nXnn,0x00", 38 | "nnXnn,00x00", 39 | "TwoDigits,00", 40 | "FourDigits,0000", 41 | "SixDigits,000000", 42 | "EightDigits,00000000", 43 | "CountryUSA,(USA)", 44 | "CountryUS,(US)", 45 | "CountryUK,(UK)", 46 | "Year,(0000)" 47 | ] 48 | --------------------------------------------------------------------------------