├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── dictionary.c
├── dictionary.h
├── docs
    ├── README.md
    └── _config.yml
├── dvr2plex.c
├── dvr2plex.h
├── keywords.hash
└── patterns.hash


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Object files
 5 | *.o
 6 | *.ko
 7 | *.obj
 8 | *.elf
 9 | 
10 | # Linker output
11 | *.ilk
12 | *.map
13 | *.exp
14 | 
15 | # Precompiled Headers
16 | *.gch
17 | *.pch
18 | 
19 | # Libraries
20 | *.lib
21 | *.a
22 | *.la
23 | *.lo
24 | 
25 | # Shared objects (inc. Windows DLLs)
26 | *.dll
27 | *.so
28 | *.so.*
29 | *.dylib
30 | 
31 | # Executables
32 | *.exe
33 | *.out
34 | *.app
35 | *.i*86
36 | *.x86_64
37 | *.hex
38 | 
39 | # Debug files
40 | *.dSYM/
41 | *.su
42 | *.idb
43 | *.pdb
44 | 
45 | # Kernel Module Compile Results
46 | *.mod*
47 | *.cmd
48 | .tmp_versions/
49 | modules.order
50 | Module.symvers
51 | Mkfile.old
52 | dkms.conf
53 | 
54 | /dvr2plex
55 | .idea/
56 | cmake-*/
57 | keywords.h
58 | patterns.h
59 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | project(DVR2Plex)
 4 | set( CMAKE_BUILD_TYPE DEBUG )
 5 | 
 6 | set( CMAKE_C_STANDARD 11 )
 7 | set( CMAKE_C_FLAGS    "-Wall -Wextra" )
 8 | 
 9 | include_directories(.)
10 | 
11 | file(GLOB HASHES "*.hash")
12 | set(OUTFILES)
13 | foreach(HASH ${HASHES})
14 | 
15 |     string(REGEX REPLACE "(.*).hash$" "\\1.h" OUTPUT_FILE_NAME ${HASH})
16 | 
17 |     add_custom_command(
18 |       OUTPUT "${OUTPUT_FILE_NAME}"
19 |       COMMAND hashstrings ${HASH}
20 |       DEPENDS "${HASH}")
21 | 
22 |     set(OUTFILES ${OUTFILES} "${OUTPUT_FILE_NAME}")
23 | 
24 | endforeach(HASH)
25 | 
26 | add_custom_target(hashes ALL DEPENDS ${OUTFILES})
27 | 
28 | add_executable( DVR2Plex dvr2plex.c dvr2plex.h dictionary.c dictionary.h)
29 | target_link_libraries( DVR2Plex "/usr/lib/x86_64-linux-gnu/libdl.so" )
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Paul Chambers
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/68d3bc77c19b400693c30f07f6fe0fdf)](https://www.codacy.com/manual/paul-chambers/DVR2Plex?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=paul-chambers/DVR2Plex&amp;utm_campaign=Badge_Grade)
  2 | 
  3 | [Full Documentation](https://paul-chambers.github.io/DVR2Plex/)
  4 | 
  5 | # DVR2Plex
  6 | 
  7 | This tool uses some fancy text processing techniques to reformat filename
  8 | into another one. To be useful, this tool needs to be used with other Linux 
  9 | command line tools, e.g. to copy (or hardlink) files to their new location.
 10 | 
 11 | Since DVR2Plex isn't actually doing the copy/hardlinking itself, I've included
 12 | a simple utility called **mkln**. I'd recommend using it, at least initially,
 13 | as it's *safe* - it won't replace an existing file in the destination,
 14 | (will append a number inside braces to avoid the name collision), and
 15 | automatically creates directories specified by the target path that don't 
 16 | yet exist.
 17 | 
 18 | ***Caution** You must accept responsibility for your configuration and use
 19 | of this tool, and accept that data loss is a possibility. Please be careful
 20 | when using this tool.
 21 | 
 22 | **Note:** this tool was written for a Linux environment. It *should* work
 23 | fine inside WSL (Windows Services for Linux), but has had little testing
 24 | there.
 25 | 
 26 | ## Why does this exist?
 27 | 
 28 | I'm a long-time user of [Plex](https://plex.tv/), and use related tools to
 29 | supply content for the Plex content library. Plex has a preferred way it
 30 | likes to see the library organized, and things generally go more smoothly
 31 | if everything uses the same organization and naming strategy.
 32 | 
 33 | I'm also a fan of the [Channels DVR](https://getchannels.com/dvr-server/), 
 34 | which is well implemented and has some features that I find particularly
 35 | useful. It keeps its recordins in a private directory, and while it is
 36 | well-organized, it's in a way that's a little different to the structure
 37 | that Plex prefers. More importantly, Channels DVR 'owns' the files in
 38 | that folder, and other software should respect that, and not 'pull the rug'
 39 | out from under Channels DVR by messing with those files behind its back.
 40 | 
 41 | While you could point Plex at the Channels directories holding the
 42 | recordings, and Plex will figure things out. However, it should treat
 43 | those directory contents as read-only, otherwise Plex will be altering
 44 | files that Channels owns.
 45 |  
 46 | I initially wrote a shell script that hardlinked the recordings Channels
 47 | DVR made in its private directory into the 'right place' in my Plex
 48 | library. A 'hard link' doesn't use any more disk space, and has the
 49 | positive attribute that the hard link in the Plex Library can be moved
 50 | and/or renamed without affecting the one in the Channels DVR 'private'
 51 | directory. The inverse is also true - the Channels DVR can delete its
 52 | file in the 'private' directory, without affecting the other link to it
 53 | in the Plex library, so it will remain. This is very handy when used
 54 | with the 'only keep *n* episodes' in Channels DVR (or
 55 | [kmttg](https://sourceforge.net/projects/kmttg) and
 56 | [jellyfin](https://jellyfin.org/) for that matter).
 57 | 
 58 | *Problem solved, right?* Well, mostly...
 59 | 
 60 | The biggest issue with such a simple approach is that the world 
 61 | hasn't yet settled on how a series is named, and possibly never will.
 62 | For example, a series like "Marvel's Agents of S.H.I.E.L.D." there
 63 | are a number of variations on that title that you'll see in the wild.
 64 | Variations like "Marvel's Agents of S.H.I.E.L.D. (2013)", or 
 65 | "Marvels Agents of S.H.I.E.L.D" (no single quote, no trailing period),
 66 | all the way to "marvels.agents.of.shield".
 67 |  
 68 | If that isn't accounted for, then differently-named directories will
 69 | accumulate, containing episodes of the same series, often duplicates.
 70 | This only gets worse as the number of content sources increases.
 71 | 
 72 | Not good. This is the itch this tool scratches.
 73 | 
 74 | ## OK, but what does it *do?*
 75 | 
 76 | In a nutshell, it's a specialized string manipulation tool. You feed it
 77 | the name of a media file, it parses out the series, season, episode,
 78 | episode title, etc. from the name, and provides a 'template' system 
 79 | that allows you to easily reassemble a new name for the destination
 80 | from the parts it extracted from the source filename.
 81 | 
 82 | *That sounds like something you could do with `sed` or `awk`. So why
 83 | write this?*
 84 | 
 85 | The pattern matching is done in a loose/fuzzy way that would be 
 86 | impractical to do in bash script or command-line string manipulation
 87 | tools.
 88 | 
 89 | It uses a fundamentally different technique - character-mapped hashing
 90 | - than the usual simple character-by-character string comparison or
 91 | regular expression methods.
 92 |  
 93 | See "How does it work?" below, if you're curious about the details.
 94 | 
 95 | The 'template' describes the form of the string that this tool should
 96 | output. The component parts are substituted in the appropriate place
 97 | where you put something like "{episode}". There are quite a number of
 98 | these parameters:
 99 | 
100 | | Parameter Name | Description                                                                     |
101 | |---             |---                                                                              |
102 | | {source}       | The path to the source file, as passed to this tool.                            |
103 | | {path}         | The 'dirname' part of the source (no trailing slash)                            |
104 | | {basename}     | The 'basename' of the source (minus the extension)                              |
105 | | {extension}    | The extension. Separated so that if what you want to do is convert containers, you can write something like {path}/{basename}.mkv as the destination in the template |
106 | | {series}       | The raw name of the series (as extracted from the source)                       |
107 | | {season}       | Always at least two digits, zero-padded                                         |
108 | | {seasonfolder} | If the season is zero, this will be "Specials", otherwise "Season {season}"     |
109 | | {episode}      | Always at least two digits, zero-padded                                         |
110 | | {title}        | The episode title                                                               |
111 | | {destseries}   | This is the target folder that the tool determined (by fuzzy match) is the right destination for the file.<br> More details below. |
112 | | {destination}  | The destination directory for the file. Also scanned as part of the fuzzy matching | 
113 | | {firstaired}   | The date this episode first aired *(specific to Channels DVR files)*            |
114 | | {daterecorded} | The date/time Channels DVR recorded this *(specific to Channels DVR files)*     |
115 | | {template}     | It's a parameter too (though you can't use it in a template, obviously)         |
116 | 
117 | This is only the predefined list of parameters that the parsing will
118 | pre-populate automatically - except for {destination} and {template},
119 | which need to be defined by the user. They can either be defined on the 
120 | command line, or in a config file - the tool looks for
121 | `/etc/DVR2Plex.conf` and `~/.config/DVR2Plex.conf`, then will
122 | process the config file defined by the `-c` command line option, before
123 | finishing with any command line options. Parameters can be defined
124 | multiple times, the last one wins. So you could, for example, define a
125 | a default {title} as '(unknown)' in a config file, and it would be used
126 | if the file name parsing didn't find an episode title.
127 | 
128 | DVR2Plex also looks for config files in the filesystem hierarchy above
129 | the source file. This is particularly useful for providing different
130 | templates and destinations for **TV** vs. **Movie** recordings, or
131 | handling a system running multiple DVR software (as I am). Obviously
132 | this check is made for each directory that DVR2Plex is asked to process
133 | files within, and so if found, these config files have 'the last say' in
134 | setting parameters, overriding everything else.
135 | 
136 | You may also define your own parameters in the config file, and use them
137 | in the template. And if the output-building code can't find a parameter
138 | name that matches in its dictionaries, it will also look for an
139 | environment variable with that name (case-sensitive, in this case). So
140 | {HOME} will be replaced by the path to the user's home directory (i.e.
141 | the equivalent of '~' in the shell)
142 | 
143 | The assumption is that at least one of the config file would contain
144 | at least the {destination} and {template} parameters, since those are
145 | likely to be the consistent on a given machine.
146 |  
147 | For example. `/etc/DVR2Plex.conf` might contain:
148 | ```
149 | destination = /home/video/TV
150 | template = mkln "{source}" "{destination}/{destseries?@/}{seasonfolder?@/}{destseries?@ }{season?S@}{episode?E@:-}{title? @}{extension}"
151 | ```
152 | So assuming that the source file was 
153 | `/home/Channels/TV/Person of Interest/Person of Interest S02E16 2013-02-21 Relevance 2018-12-30-0000.mpg` 
154 | and a directory existed called `/home/video/TV/Person of Interest (2011)`
155 | then that template would output (or execute):
156 |  
157 |  `mkln "/home/video/TV/Person of Interest/Person of Interest S02E16 2013-02-21 Relevance 2018-12-30-0000.mpg" "/home/video/TV/Person of Interest (2011)/Season 02/Person of Interest (2011) S02E16 Relevence.mpg"`
158 |  
159 | but perhaps more impressive is that a source file of `/home/paul/downloads/person.of.interest.2x16.relevence.mpg`
160 | would also create the same destination: 
161 |   `"/home/video/TV/Person of Interest (2011)/Season 02/Person of Interest (2011) S02E16 Relevence.mpg"`
162 | 
163 | **Caution** DVR2Plex will blindly execute whatever you tell it to execute. It
164 | is just manipulating strings, after all. It has no notion of the quality of
165 | the source file vs. an existing destination file, and will happily overwrite
166 | a high quality file with a lower quality one if that's what you tell it to do.
167 | 
168 | Be aware of this when creating a template you expect DVR2Plex to execute directly.
169 | 
170 | ### Conditional Expansions
171 | *But wait, what on earth does {episode?E@:-} mean?*
172 | 
173 | When a parameter isn't defined, it expands to nothing. Which is all well
174 | and good, except if there's some surrounding characters that need to
175 | disappear too. {episode?E@:-} means 'if {episode} is defined, output 'E'
176 | followed by the contents of {episode}, otherwise output just '-'.
177 | Similarly {seasonfolder} would normally be seen in a template written
178 | as {seasonfolder?@/} so that the path separator is only included if 
179 | {seasonfolder} is defined.
180 | 
181 | This is akin to the trinary operator in C, if you're a programmer -
182 | up the the '?' is the thing to test, after the '?' and before the ':'
183 | or '}' is the string to output if it is defined ('true'), between the
184 | ':' and '}' is the string to output if it isn't defined ('false').
185 | Where an '@' appears, insert the value of the parameter.
186 | 
187 | ## How does it work?
188 | 
189 | The tool uses modified hashing to do comparisons. The hashing is
190 | modified by mapping each character through a mapping table first, so
191 | that particular characters can be mapped to another, or ignored
192 | completely. For example, upper case characters are mapped to lower case,
193 | so "UPPER" has the same hash as "upper" and "UpPeR"
194 | 
195 | DVR2Plex first builds up a list of hashes for the directories
196 | found in the {destination} directory.
197 | 
198 | The matching algorithm isn't confused by differing case, missing
199 | apostrophes, presence or absence of a year or country (e.g.
200 | "hells_kitchen" will match a directory named "Hell's Kitchen (US)" in
201 | the destination.
202 | 
203 | This is particularly useful for the worst offenders. For example, if you
204 | have a destination folder called "Marvel's Agents of S.H.I.E.L.D. (2013)".
205 | The fuzzy matching can deal with something in the source like
206 | "marvels.agents.of.shield" and still put it in the correct folder.
207 | 
208 | This fixes all-lowercase series for example, or random "Of"/"of" confusion
209 | (or "MythBusters" vs. "Mythbusters")
210 | 
211 | Some characters are often dropped, like apostrophes or the trailing
212 | period of an acronym, like S.W.A.T., so those are ignored. "Marvel's"
213 | matches "marvels", "swat" or "S.W.A.T" matches "S.W.A.T."
214 | 
215 | All digits are mapped to '0' (though only for the pattern matching), so
216 | we have a constant hash for patterns like S02E10, S01E05, etc. This
217 | allows us to easily find the several patterns we're looking for, very
218 | efficiently. Those patterns are mostly season/episode patterns: SnnEnn,
219 | SnnnnEnn, nXnn, nnXnn and a few less-common variations. We also identify
220 | nnnn-nn-nn as the pattern for the 'first aired' date for Channels DVR
221 | recordings, along with nnnn-nn-nn-nnnn for the date recorded.
222 | 
223 | There are in fact two character maps used for hashing. One for finding
224 | patterns, and another for looking up parameters. The main difference is
225 | that the parameter mapping doesn't map all digits to zero.
226 | 
227 | Internally, there are three 'dictionaries' used for searching, each is 
228 | a simple key-value list, using the hash as the key. The 'main'
229 | dictionary contains all the parameters that will remain the same for
230 | the entire run. This is the dictionary populated from the config files
231 | and command line options. There is a 'file' dictionary, which contains
232 | the parsed value for the last source parsed. This is discarded and
233 | rebuilt for each source, so per-file values don't carry over from one
234 | source to the next.
235 | 
236 | The third dictionary is the 'series' one, which is populated with hashes
237 | of the directory names that it finds by doing a scan of the {destination}
238 | directory. The assumption is that these are essentially the canonical
239 | names/destinations for the known TV series. For a given series directory,
240 | there may be more than one dictionary entry, as the hash without the
241 | year or country is included, as well as with it included. So the hash
242 | for "Person of Interest" is stored as well as "Person of Interest (2011)",
243 | both pointing to the target "Person of Interest (2011)" directory.
244 | Thus either form will match, and point to the right destination directory.
245 | This is the mechanism behind the {destfolder} parameter.
246 | 


--------------------------------------------------------------------------------
/dictionary.c:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by root on 8/22/19.
 3 | //
 4 | #include "dvr2plex.h"
 5 | #include <stdlib.h>
 6 | #include <stdio.h>
 7 | #include <string.h>
 8 | 
 9 | #include "dictionary.h"
10 | 
11 | tDictionary * createDictionary( const char * name )
12 | {
13 |     tDictionary * result = (tDictionary *)calloc( 1, sizeof(tDictionary) );
14 |     result->name = name;
15 |     return result;
16 | }
17 | 
18 | void emptyDictionary( tDictionary * dictionary )
19 | {
20 | 	tParam * p = dictionary->head;
21 | 	dictionary->head = NULL;
22 | 
23 | 	while ( p != NULL)
24 | 	{
25 | 		if ( p->value != NULL )
26 | 		{
27 | 			free( (void *) p->value );
28 | 		}
29 | 
30 | 		tParam * next = p->next;
31 | 		free( p );
32 | 		p = next;
33 | 	}
34 | }
35 | 
36 | void destroyDictionary( tDictionary * dictionary )
37 | {
38 |     emptyDictionary( dictionary );
39 |     free( dictionary );
40 | }
41 | 
42 | void printDictionary( tDictionary * dictionary )
43 | {
44 |     if ( dictionary != NULL )
45 |     {
46 |         debugf( 3, "...%s dictionary...\n", dictionary->name);
47 | 
48 |         tParam * p = dictionary->head;
49 |         while ( p != NULL )
50 |         {
51 |             debugf( 3, "%16s: \"%s\"\n", lookupHash(p->hash), p->value );
52 |             p = p->next;
53 |         }
54 |     }
55 | }
56 | 
57 | int addParam( tDictionary * dictionary, tHash hash, const char * value )
58 | {
59 |     int result = -1;
60 | 
61 |     tParam * p = malloc( sizeof(tParam) );
62 | 
63 |     if (p != NULL)
64 |     {
65 |         p->hash  = hash;
66 |         p->value = strdup( value );
67 | 
68 |         p->next = dictionary->head;
69 |         dictionary->head = p;
70 | 
71 |         result = 0;
72 |     }
73 |     return result;
74 | }
75 | 
76 | string findValue( tDictionary * dictionary, tHash hash )
77 | {
78 |     string result = NULL;
79 |     tParam * p = dictionary->head;
80 | 
81 |     while (p != NULL)
82 |     {
83 |         if ( p->hash == hash )
84 |         {
85 |             result = p->value;
86 |             break;
87 |         }
88 |         p = p->next;
89 |     }
90 |     return result;
91 | }
92 | 


--------------------------------------------------------------------------------
/dictionary.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by root on 8/22/19.
 3 | //
 4 | 
 5 | #ifndef DVR2PLEX_DICTIONARY_H
 6 | #define DVR2PLEX_DICTIONARY_H
 7 | 
 8 | typedef unsigned long tHash;
 9 | 
10 | typedef struct tParam {
11 |     struct tParam * next;
12 |     const char    * value;
13 |     tHash           hash;
14 | } tParam;
15 | 
16 | typedef struct {
17 |     tParam     * head;
18 |     const char * name;
19 | } tDictionary;
20 | 
21 | tDictionary *  createDictionary( const char * name );
22 |          void  emptyDictionary( tDictionary * dictionary );
23 |          void  destroyDictionary( tDictionary * dictionary );
24 |        string  lookupHash( tHash );
25 |          void  printDictionary( tDictionary * dictionary);
26 |           int  addParam( tDictionary * dictionary, tHash hash, string value );
27 |        string  findValue( tDictionary * dictionary, tHash hash );
28 | 
29 | #endif // DVR2PLEX_DICTIONARY_H
30 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
  1 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/68d3bc77c19b400693c30f07f6fe0fdf)](https://www.codacy.com/manual/paul-chambers/DVR2Plex?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=paul-chambers/DVR2Plex&amp;utm_campaign=Badge_Grade)
  2 | 
  3 | # DVR2Plex
  4 | 
  5 | **Caution:** If you are new to the linux command line, and/or are unfamiliar
  6 | with common linux tools like 'find', I wouldn't recommend this as a good first
  7 | project because of the danger of overwriting existing files.
  8 | 
  9 | This tool uses some fancy text processing techniques to reformat filename
 10 | into another one. To be useful, this tool needs to be used with other Linux 
 11 | command line tools, e.g. to copy (or hardlink) files to their new location.
 12 | 
 13 | Since DVR2Plex isn't actually doing the copy/hardlinking itself, it
 14 | cannot prevent the tool doing the copy from blindly overwriting an existing
 15 | high-quality file with a lower quality one. Thus it's best to have your
 16 | template generate a destination filename that won't overwrite any existing
 17 | files.
 18 | 
 19 | You must accept responsibility for your configuration and use of this tool,
 20 | and accept that data loss is a possibility. Be careful when using this tool.
 21 | 
 22 | **Note:** this tool was written for a Linux environment. It *should* work
 23 | fine inside WSL (Windows Services for Linux), but has had little testing
 24 | there.
 25 | 
 26 | ## Why does this exist?
 27 | 
 28 | I'm a long-time user of [Plex](https://plex.tv/), and use related tools to
 29 | supply content for the Plex content library. Plex has a preferred way it
 30 | likes to see the library organized, and things generally go more smoothly
 31 | if everything uses the same organization and naming strategy.
 32 | 
 33 | I'm also a fan of the [Channels DVR](https://getchannels.com/dvr-server/), 
 34 | which is particularly well implemented and has some features that I find
 35 | particularly useful. It keeps its recordins in a private directory, and
 36 | while it is well-orgainized, it's in a way that's a little different to
 37 | the structure that Plex prefers. Most importantly, Channels DVR 'owns'
 38 | the files in that folder, and other software must respect that, and not
 39 | 'pull the rug out' from under Channels by messing with those files.
 40 | 
 41 | While you could point Plex at the Channels directories holding the
 42 | recordings, and Plex will figure things out. But it should treat those 
 43 | directory contents as read-only, otherwise Plex will be altering files
 44 | that Channels owns.
 45 |  
 46 | *So,* I initially wrote a shell script that hardlinked the recordings
 47 | Channels DVR made in its private directory into the 'right place' in
 48 | my Plex library. A hard link doesn't use any more disk space, but does
 49 | mean the hard link in the Plex Library can bre moved and/or renamed
 50 | without affecting the one in the Channels DVR 'private' directory.
 51 | The inverse is also true - the Channels DVR can delete its file in
 52 | the 'private' directory, but the other link to it in the Plex library
 53 | will remain. Which is very handy if you want to tell Channels DVR to
 54 | 'only keep *n* episodes' (or [kmttg](https://sourceforge.net/projects/kmttg),
 55 | for that matter).
 56 | 
 57 | *Problem solved, right?* Well, mostly...
 58 | 
 59 | The biggest issue with such a simple approach is that the world 
 60 | hasn't yet settled on how a series is named, and possibly never will.
 61 | For example, a series like "Marvel's Agents of S.H.I.E.L.D." there
 62 | are a number of variations on that title that you'll see in the wild.
 63 | Variations like "Marvel's Agents of S.H.I.E.L.D. (2013)", or 
 64 | "Marvels Agents of S.H.I.E.L.D" (no single quote, no trailing period),
 65 | all the way to "marvels.agents.of.shield".
 66 |  
 67 | If that isn't accounted for, then differently-named directories will
 68 | accumulate, containing episodes of the same series, often duplicates.
 69 | This only gets worse as the number of content sources increases.
 70 | 
 71 | Not good. This is the itch this tool scratches.
 72 | 
 73 | ## OK, but what does it *do?*
 74 | 
 75 | In a nutshell, it's a specialized string manipulation tool. You feed it
 76 | the name of a media file, it parses out the series, season, episode,
 77 | episode title, etc. from the name, and provides a 'template' system 
 78 | that allows you to easily reassemble a new name for the destination
 79 | from the parts it extracted from the source filename.
 80 | 
 81 | *That sounds like something you could do with `sed` or `awk`. So why
 82 | write this?*
 83 | 
 84 | The pattern matching is done in a loose/fuzzy way that would be 
 85 | impractical to do in bash script or command-line string manipulation
 86 | tools.
 87 | 
 88 | It uses a fundamentally different technique - character-mapped hashing
 89 | - than the usual simple character-by-character string comparison or
 90 | regular expression methods.
 91 |  
 92 | See "How does it work?" below, if you're curious about the details.
 93 | 
 94 | The 'template' describes the form of the string that this tool should
 95 | output. The component parts are substituted in the appropriate place
 96 | where you put something like "{episode}". There are quite a number of
 97 | these parameters:
 98 | 
 99 | | parameter name | description |
100 | |---             |---          |
101 | | {source}       | The path to the source file, as passed to this tool. |
102 | | {path}         | The 'dirname' part of the source (no trailing slash) |
103 | | {basename}     | The 'basename' of the source (without the extension) |
104 | | {extension}    | The extension. separate so that if what you want to do is convert containers, you can use something like {path}/{basename}.mkv |
105 | | {series}       | The raw name of the series (as extracted from the source |
106 | | {season}       | Always at least two digits, zero-padded |
107 | | {seasonfolder} | If the season is zero, this will be "Specials", otherwise equivalent to "Season {season}"
108 | | {episode}      | Always at least two digits, zero-padded |
109 | | {title}        | The episode title |
110 | | {destseries}   | This is the target folder that the tool determined, by a fuzzy match, is the right destination for the file.<br> More details below. |
111 | | {destination}  | The destination directory for the file. Also used as part of the fuzzy matching | 
112 | | {firstaired}   | the date this episode first aired *(specific to Channels DVR files)* |
113 | | {daterecorded} | the date/time Channels DVR recorded this *(specific to Channels DVR files)* |
114 | | {template}     | it's a parameter too, though you can't use it in a template |
115 | 
116 | This is only the predefined list of parameters that the parsing will
117 | pre-populate automatically - except for {destination} and {template},
118 | which need to be defined by the user. They can either be defined on the 
119 | command line, or in a config file - the tool looks for
120 | `/etc/DVR2Plex.conf` and `~/.config/DVR2Plex.conf`, then will
121 | process the config file defined by the `-c` command line option, before
122 | finishing with any command line options. Parameters can be defined
123 | multiple times, the last one wins. So you could, for example, define a
124 | a default {title} as '(unknown)' in a config file, and it would be used
125 | if the file name parsing didn't find an episode title.
126 | 
127 | You may also define your own parameters in the config file, and use them
128 | in the template. And if the output-building code can't find a parameter
129 | name that matches in its dictionaries, it will also look for an
130 | environment variable with that name (case-sensitive, in this case). So
131 | {HOME} will be replaced by the path to the user's home directory (i.e.
132 |  the equivalent of '~')
133 | 
134 | The assumption is that a config file would contain at least the
135 | {destination} and {template} parameters, since those are likely to be
136 | the consistent on a given machine. For example. `/etc/DVR2Plex.conf`
137 | might contain:
138 | ```
139 | destination = /home/video/TV
140 | template = "{source}" "{destination}/{destseries?@/}{seasonfolder?@/}{destseries?@ }{season?S@}{episode?E@:-}{title? @}{extension}"
141 | ```
142 | So assuming that the source file was 
143 | `/home/Channels/TV/Person of Interest/Person of Interest S02E16 2013-02-21 Relevance 2018-12-30-0000.mpg` 
144 | and a directory existed called `/home/video/TV/Person of Interest (2011)`
145 | then that template would output:
146 |  
147 |  `"/home/video/TV/Person of Interest/Person of Interest S02E16 2013-02-21 Relevance 2018-12-30-0000.mpg" "/home/video/TV/Person of Interest (2011)/Season 02/Person of Interest (2011) S02E16 Relevence.mpg"`
148 |  
149 |  but perhaps more impressive is that a source file of `/home/paul/downloads/person.of.interest.2x16.relevence.mpg`
150 |   would also create the same destination of 
151 |   `"/home/video/TV/Person of Interest (2011)/Season 02/Person of Interest (2011) S02E16 Relevence.mpg"`
152 | 
153 | **Caution:** It's a good practice to include something in the template
154 | that is guaranteed to make the generated name unique, so that it won't
155 | overwrite an existing file in the destination (portentially a lower
156 | quality version). Since Channel DVR recordings have an .mpg extension,
157 | you'll probably be OK, but better safe than sorry.
158 | 
159 | ### Conditional Expansions
160 | *But wait, what on earth does {episode?E@:-} mean?*
161 | 
162 | When a parameter isn't defined, it expands to nothing. Which is all well
163 | and good, except if there's some surrounding characters that need to
164 | disappear too. {episode?E@:-} means 'if {episode} is defined, output 'E'
165 | followed by the contents of {episode}, otherwise output just '-'.
166 | Similarly {seasonfolder} would normally be seen in a template written
167 | as {seasonfolder?@/} so that the path separator is only included if 
168 | {seasonfolder} is defined.
169 | 
170 | This is akin to the trinary operator in C, if you're a programmer -
171 | up the the '?' is the thing to test, after the '?' and before the ':'
172 | or '}' is the string to output if it is defined ('true'), between the
173 | ':' and '}' is the string to output if it isn't defined ('false').
174 | Where an '@' appears, insert the value of the parameter.
175 | 
176 | ## How does it work?
177 | 
178 | The tool uses modified hashing to do comparisons. The hashing is
179 | modified by mapping each character through a mapping table first, so
180 | that particular characters can be mapped to another, or ignored
181 | completely. For example, upper case characters are mapped to lower case,
182 | so "UPPER" has the same hash as "upper" or "UpPeR"
183 | 
184 | DVR2Plex first builds up a list of hashes for the directories
185 | found in the {destination} directory.
186 | 
187 | The matching algorithm is not phased by differing case, missing
188 | apostrophes, presence or absence of a year or country (e.g.
189 | "hells_kitchen" will match a directory named "Hell's Kitchen (US)" in
190 | the destination.
191 | 
192 | This is particularly useful for the worst offenders. For example, if you
193 | have a destination folder called "Marvel's Agents of S.H.I.E.L.D. (2013)".
194 | The fuzzy matching can deal with something in the source like
195 | "marvels.agents.of.shield" and still put it in the correct folder.
196 | 
197 | This fixes all-lowercase series for example, or random "Of"/"of" confusion
198 | (or "MythBusters" vs. "Mythbusters")
199 | 
200 | Some characters are often dropped, like apostrophes or the trailing
201 | period of an acronym, like S.W.A.T., so those are ignored. "Marvel's"
202 | matches "marvels", "swat" or "S.W.A.T" matches "S.W.A.T."
203 | 
204 | All digits are mapped to '0' (though only for the pattern matching), so
205 | we have a constant hash for patterns like S02E10, S01E05, etc. This
206 | allows us to easily find the several patterns we're looking for, very
207 | efficiently. Those patterns are mostly season/episode patterns: SnnEnn,
208 | SnnnnEnn, nXnn, nnXnn and a few less-common variations. We also identify
209 | nnnn-nn-nn as the pattern for the 'first aired' date for Channels DVR
210 | recordings, along with nnnn-nn-nn-nnnn for the date recorded.
211 | 
212 | There are in fact two character maps used for hashing. One for finding
213 | patterns, and another for looking up parameters. The main difference is
214 | that the parameter mapping doesn't map all digits to zero.
215 | 
216 | Internally, there are three 'dictionaries' used for searching, each is 
217 | a simple key-value list, using the hash as the key. The 'main'
218 | dictionary contains all the parameters that will remain the same for
219 | the entire run. This is the dictionary populated from the config files
220 | and command line options. There is a 'file' dictionary, which contains
221 | the parsed value for the last source parsed. This is discarded and
222 | rebuilt for each source, so per-file values don't carry over from one
223 | source to the next.
224 | 
225 | The third dictionary is the 'series' one, which is populated with hashes
226 | of the directory names that it finds by doing a scan of the {destination}
227 | directory. The assumption is that these are essentially the canonical
228 | names/destinations for the known TV series. For a given series directory,
229 | there may be more than one dictionary entry, as the hash without the
230 | year or country is included, as well as with it included. So the hash
231 | for "Person of Interest" is stored as well as "Person of Interest (2011)",
232 | both pointing to the target "Person of Interest (2011)" directory.
233 | Thus either form will match, and point to the right destination directory.
234 | This is the mechanism behind the {destfolder} parameter.
235 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-architect


--------------------------------------------------------------------------------
/dvr2plex.c:
--------------------------------------------------------------------------------
   1 | /**
   2 |    Copyright &copy; Paul Chambers, 2019.
   3 | 
   4 |    @ToDo Switch to UTF-8 string handling, rather than relying on ASCII backwards-compatibility
   5 | */
   6 | 
   7 | #define _XOPEN_SOURCE 700
   8 | #include <features.h>
   9 | 
  10 | #include "dvr2plex.h"
  11 | #include <stdlib.h>
  12 | #include <stdio.h>
  13 | #include <errno.h>
  14 | #include <limits.h>
  15 | #include <sys/types.h>
  16 | #include <string.h>
  17 | #include <ctype.h>
  18 | #include <time.h>
  19 | #include <libgen.h> // for basename()
  20 | #include <pwd.h>
  21 | #define __USE_MISC  // dirent.d_type is linux-specific, apparently
  22 | #include <dirent.h>
  23 | #define __USE_GNU
  24 | #include <unistd.h>
  25 | #include <sys/stat.h>
  26 | 
  27 | #include <dlfcn.h>
  28 | 
  29 | #include "dictionary.h"
  30 | 
  31 | 
  32 | /*  hashes for patterns we are scanning for in the filename
  33 |     this hash table is used to generate hashes used to match patterns.
  34 |     it maps all digits to the same value, maps uppercase letters to
  35 |     lowercase, and ignores several characters completely.
  36 |  */
  37 | #include "patterns.h"
  38 | 
  39 | /*
  40 |    Hashes for the keywords/parameter names in the template. This hash table is also
  41 |    used for series names.
  42 | 
  43 |    Periods are ignored, because the trailing one is often omitted of series like
  44 |    "S.W.A.T." and "Marvel's Agents of S.H.I.E.L.D.". By ignoring periods,
  45 |    "S.W.A.T.", "S.W.A.T" and "SWAT" will all result in the same hash value.
  46 | 
  47 |    Since periods may also be used as a separator, we have to treat ' ' and '_' as
  48 |    equivalent, or the hash for a space-separated name won't match the hash of a
  49 |    period- or underscore-seperated one.
  50 | 
  51 |    In other words, ' ', '_' and '.' do not contribute to the series hash. Similarly,
  52 |    apostrophes are also often omitted ("Marvel's" becomes "Marvels"), so it is
  53 |    similarly ignored when generating a hash, along with '?' (e.g. "Whose Line Is It
  54 |    Anyway?") and '!' ("I'm a Celebrity...Get Me Out of Here!").
  55 | 
  56 |    "Marvel's Agents of S.H.I.E.L.D. (2017)" is perhaps one of the most difficult
  57 |    matching examples I've seen in the wild. There are so many ways to mangle that.
  58 | 
  59 |    ':' is usually converted to '-' or omitted entirely, so ignore those, too.
  60 | 
  61 |    Left and right brackets are also mapped to be equivalent, e.g. [2017] has the
  62 |    same hash as (2017).
  63 |  */
  64 | #include "keywords.h"
  65 | 
  66 | string gMyName;
  67 | int gDebugLevel = 0;
  68 | unsigned int gNextYear = 1895;
  69 | 
  70 | tDictionary * gMainDict;
  71 | tDictionary * gPathDict;
  72 | tDictionary * gFileDict;
  73 | tDictionary * gSeriesDict;
  74 | 
  75 | string gCachedPath   = NULL;
  76 | string gCachedSeries = NULL;
  77 | 
  78 | typedef struct sToken
  79 | {
  80 | 	struct sToken * next;
  81 | 	string          start;
  82 | 	string          end;
  83 | 	tHash           hash;
  84 | 	unsigned char   seperator;
  85 | } tToken;
  86 | 
  87 | tToken gTokenList;
  88 | 
  89 | /**
  90 |  * trim any trailing whitespace from the end of the string
  91 |  *
  92 |  * @param line	line to be trimmed
  93 |  */
  94 | void trimTrailingWhitespace(char * line)
  95 | {
  96 |     char * t    = line;
  97 |     char * nwsp = line;
  98 | 
  99 |     if ( t != NULL )
 100 |     {
 101 |         while (*t != '\0')
 102 |         {
 103 |             if (!isspace(*t++))
 104 |             {
 105 |                 // note: t has already been incremented
 106 |                 nwsp = t;
 107 |             }
 108 |         }
 109 |         *nwsp = '\0';
 110 |     }
 111 | }
 112 | 
 113 | string lookupHash(tHash hash)
 114 | {
 115 | 	tKeywordHashMapping * keywordMap = KeywordHashLookup;
 116 | 
 117 | 	while ( keywordMap->key != 0 )
 118 | 	{
 119 | 		if ( hash == keywordMap->key )
 120 | 		{
 121 | 			return keywordMap->label;
 122 | 		}
 123 | 		keywordMap++;
 124 | 	}
 125 | 
 126 | 	tPatternHashMapping * patternMap = PatternHashLookup;
 127 | 
 128 | 	while ( patternMap->key != 0 )
 129 | 	{
 130 | 		if ( hash == patternMap->key )
 131 | 		{
 132 | 			return patternMap->label;
 133 | 		}
 134 | 		patternMap++;
 135 | 	}
 136 | 
 137 | 	return "<unknown>";
 138 | }
 139 | 
 140 | /**
 141 |  * @brief look in the three dictionaries for the first occurance of a hash value
 142 |  * @param hash
 143 |  * @return
 144 |  */
 145 | string findParam( tHash hash )
 146 | {
 147 | 	string result;
 148 | 
 149 | 	result = findValue( gFileDict, hash );
 150 | 	if ( result == NULL )
 151 | 	{
 152 | 		result = findValue( gPathDict, hash );
 153 | 	}
 154 | 	if ( result == NULL )
 155 | 	{
 156 | 		result = findValue( gMainDict, hash );
 157 | 	}
 158 | 	return result;
 159 | }
 160 | 
 161 | /**
 162 |    Hashes the 'series' using the 'keyword' hash table, since comparing series names needs
 163 |    slightly different logic than scanning for patterns. Separators (spaces, periods,
 164 |    underscores) are ignored completely. As are \', !, amd ?, since those are frequently
 165 |    omitted. Upper case letters are mapped to lower case since those are also very
 166 |    inconsistent (no UTF-8 handling yet, though). and '&' is expanded to 'and' in the
 167 |    hash, so both forms will hash to the same value.
 168 | 
 169 |    Since a series name may or may not be suffixed by a year or country surrounded
 170 |    by brackets (e.g. (2019) or (US)). So a hash is added whenever a left bracket
 171 |    is encountered, so the hash for 'Some Series' and 'Some Series (2019)' are both
 172 |    stored in the series dictionary, so there will be a hash available to match
 173 |    either with or without the suffix.
 174 |  */
 175 | void addSeries( string series )
 176 | {
 177 |     tHash result = 0;
 178 |     unsigned char * s = (unsigned char *)series;
 179 |     unsigned char   c;
 180 | 
 181 |     do {
 182 |         c = kKeywordMap[ *s++ ];
 183 |         switch ( c )
 184 |         {
 185 |             // we hash the '&' character as if 'and' was used. so both forms generate the same hash
 186 |             // e.g. the hash of 'Will & Grace' will match the hash of 'Will and Grace'
 187 |         case '&':
 188 |             result = fKeywordHashChar( result, 'a' );
 189 |             result = fKeywordHashChar( result, 'n' );
 190 |             result = fKeywordHashChar( result, 'd' );
 191 |             break;
 192 | 
 193 |         case kKeywordLBracket:
 194 |             // we found something bracketed, e.g. (uk) or (2019), so we also add the
 195 |             // intermediate hash to the dictionary, before we hash the bracketed content.
 196 |             // Then if we hash the same series with the year omitted, for example, will
 197 |             // still match something. Though we can't do much about a file that omits a
 198 |             // a year or country, e.g. 'MacGyver' instead of 'MacGyver (2016)', or
 199 |             // 'Hell's Kitchen' instead of 'Hell's Kitchen (US)'
 200 |             //
 201 |             // Note: if there are multiple left brackets encountered, there will be
 202 |             // multiple intermediate hashes added.
 203 | 
 204 |             addParam( gSeriesDict, result, series );
 205 |             result = fKeywordHashChar( result, c );
 206 |             break;
 207 | 
 208 |         case '\0':
 209 |         case kKeywordSeparator:
 210 |         case kKeywordIgnored:
 211 |             break;
 212 | 
 213 |         default:
 214 |             result = fKeywordHashChar( result, c );
 215 |             break;
 216 |         }
 217 |     } while ( c != '\0' );
 218 | 
 219 |     // also add the hash of the full string, including any trailing bracketed stuff
 220 |     addParam( gSeriesDict, result, series );
 221 | }
 222 | 
 223 | static int scanDirFilter( const struct dirent * entry)
 224 | {
 225 |     int result = 0;
 226 | 
 227 |     result = ( entry->d_name[0] != '.' && entry->d_type == DT_DIR );
 228 | 
 229 |     // debugf( 3, "%s, 0x%x, %d\n", entry->d_name, entry->d_type, result );
 230 |     return result;
 231 | }
 232 | 
 233 | int buildSeriesDictionary( string path )
 234 | {
 235 |     struct dirent **namelist;
 236 |     int n;
 237 | 
 238 |     n = scandir( path, &namelist, scanDirFilter, alphasort);
 239 |     if ( n < 0 ) {
 240 |         perror("scandir");
 241 |         return n;
 242 |     }
 243 | 
 244 |     for ( int i = 0; i < n; ++i )
 245 |     {
 246 |         addSeries( namelist[ i ]->d_name );
 247 |         free( namelist[ i ] );
 248 |     }
 249 |     free(namelist);
 250 | 
 251 |     /* printDictionary( dictionary ); */
 252 | 
 253 |     return 0;
 254 | }
 255 | 
 256 | void addSeasonEpisode( unsigned int season, unsigned int episode )
 257 | {
 258 |     char  temp[50];
 259 | 
 260 |     snprintf( temp, sizeof(temp), "%02u", season );
 261 |     addParam( gFileDict, kKeywordSeason, temp );
 262 |     if ( season == 0 || episode == 0 )
 263 |     {
 264 | 	    addParam( gFileDict, kKeywordSeasonFolder, "Specials" );
 265 |     }
 266 |     else
 267 |     {
 268 |         snprintf( temp, sizeof(temp), "Season %02u", season );
 269 | 	    addParam( gFileDict, kKeywordSeasonFolder, temp );
 270 |     }
 271 | 
 272 | 	snprintf( temp, sizeof(temp), "%02u", episode );
 273 |     addParam( gFileDict, kKeywordEpisode, temp );
 274 | }
 275 | 
 276 | void storeSeries( string series )
 277 | {
 278 |     string result = series;
 279 |     string ptr, end;
 280 |     tHash hash;
 281 |     unsigned char c;
 282 | 
 283 |     ptr  = series;
 284 |     hash = 0;
 285 | 
 286 |     addParam( gFileDict, kKeywordSeries, series );
 287 | 
 288 |     // regenerate the hash incrementally, checking at each separator.
 289 |     // remember the longest match, i.e. keep looking until the end of the string
 290 |     do {
 291 |         c = kKeywordMap[ (unsigned char)*ptr ];
 292 |         switch ( c )
 293 |         {
 294 |         case kKeywordSeparator:
 295 |         case '\0':
 296 |         	/* let's see if we have a match */
 297 |             debugf( 4, "checking: 0x%016lx\n", hash );
 298 | 
 299 |             string match = findValue( gSeriesDict, hash );
 300 |             if ( match != NULL)
 301 |             {
 302 |                 result = match;
 303 |                 debugf( 3, "matched %s\n", result );
 304 |                 end = ptr;
 305 |             }
 306 |             break;
 307 | 
 308 |         case '&':
 309 |             hash = fPatternHashChar( hash, 'a' );
 310 |             hash = fPatternHashChar( hash, 'n' );
 311 |             hash = fPatternHashChar( hash, 'd' );
 312 |             break;
 313 | 
 314 |         default:
 315 |             hash = fPatternHashChar( hash, c );
 316 |             break;
 317 |         };
 318 |         ptr++;
 319 |     } while ( c != '\0' );
 320 | 
 321 |     if ( result != series )
 322 |     {
 323 |         if ( *end != '\0' )
 324 |         {
 325 |             /* if the run is longer than the match with the series name,
 326 |                then store the trailing remnant as the episode title */
 327 |             addParam( gFileDict, kKeywordTitle, (string) end + 1 );
 328 | 	        *(char *) end = '\0';
 329 |         }
 330 |     }
 331 | 	addParam( gFileDict, kKeywordDestSeries, result );
 332 | }
 333 | 
 334 | int storeToken( tHash hash, string value )
 335 | {
 336 |     unsigned int season  = 0;
 337 |     unsigned int episode = 0;
 338 |     unsigned int year    = 0;
 339 |     char temp[20];
 340 |     string seriesName;
 341 | 
 342 |     switch (hash)
 343 |     {
 344 |     case kPatternSnnEnn:   // we found 'SnnEnn' or
 345 |     case kPatternSyyyyEnn: // SyyyyEnnn
 346 |     case kPatternSnnEn:    // SnnEn
 347 |     case kPatternSnEnn:    // SnEnn
 348 |     case kPatternSnEn:     // SnEn
 349 |         debugf( 3,"SnnEnn: %s\n", value);
 350 |         sscanf( value, "%*1c%u%*1c%u", &season, &episode ); // ignore characters since we don't know their case
 351 |         addSeasonEpisode( season, episode );
 352 |         break;
 353 | 
 354 |     case kPatternEnnn:
 355 |         debugf( 3,"Ennn: %s\n", value);
 356 |         sscanf( value, "%*1c%u", &episode ); // ignore characters since we don't know their case
 357 |         season = episode / 100;
 358 |         episode %= 100;
 359 |         addSeasonEpisode( season, episode );
 360 |         break;
 361 | 
 362 |     case kPatternEnnnn:
 363 |         debugf( 3,"Ennnn: %s\n", value);
 364 |         sscanf( value, "%*1c%u", &episode ); // ignore characters since we don't know their case
 365 |         unsigned int divisor = 100;
 366 |         /* see if there's a season number to extract */
 367 |         if ( ((episode / divisor) % 10) == 0 )
 368 |         {
 369 |             /* least significant digit of season is zero, so we can increase the divisor by 10 */
 370 |             divisor *= 10;
 371 |         }
 372 |         season = episode / divisor;
 373 |         episode %= divisor;
 374 |         addSeasonEpisode( season, episode );
 375 |         break;
 376 | 
 377 |     case kPatternnXnn:
 378 |     case kPatternnnXnn:
 379 |         debugf( 3, "nnXnn: %s\n", value);
 380 |         sscanf( value, "%u%*1c%u", &season, &episode ); // ignore characters since we don't know their case
 381 |         addSeasonEpisode( season, episode );
 382 |         break;
 383 | 
 384 |     case kPatternYear:
 385 | 	    sscanf( value, "%*1c%u%*1c", &year ); // ignore characters since we don't know their case
 386 |         if ( 1890 < year && year <= gNextYear )
 387 |         {
 388 |             snprintf( temp, sizeof( temp ), "%u", year );
 389 |             addParam( gFileDict, kKeywordYear, temp );
 390 |         }
 391 | 	    debugf( 3, "year: %u\n", year );
 392 | 	    break;
 393 | 
 394 |     case kPatternCountryUSA:
 395 |     	addParam( gFileDict, kKeywordCountry, "USA" );
 396 |     	break;
 397 | 
 398 |     case kPatternCountryUS:
 399 | 	    addParam( gFileDict, kKeywordCountry, "US" );
 400 | 	    break;
 401 | 
 402 |     case kPatternCountryUK:
 403 | 	    addParam( gFileDict, kKeywordCountry, "UK" );
 404 | 	    break;
 405 | 
 406 |     case kPatternNoMatch:
 407 |         seriesName = findParam( kKeywordSeries );
 408 |         if ( seriesName == NULL )
 409 |         {
 410 |             debugf( 3, "series: %s\n", value );
 411 |             storeSeries( value );
 412 |         }
 413 |         else
 414 |         {
 415 |             debugf( 3, "title: %s\n", value );
 416 |             addParam( gFileDict, kKeywordTitle, value );
 417 |         }
 418 |         break;
 419 | 
 420 |     // kPatternTwoDigits:
 421 |     // kPatternFourDigits:
 422 |     // kPatternSixDigits:
 423 |     // kPatternEightDigits:
 424 |     default:
 425 |         break;
 426 |     }
 427 |     return 0;
 428 | }
 429 | 
 430 | tHash checkHash( tHash hash)
 431 | {
 432 |     switch (hash)
 433 |     {
 434 |     case kPatternSnnEnn:       // SnnEnn
 435 |     case kPatternSyyyyEnn:     // SnnnnEnn
 436 |     case kPatternSnnEn:        // SnnEn
 437 |     case kPatternSnEnn:        // SnEnn
 438 |     case kPatternSnEn:         // SnEn
 439 |     case kPatternEnnn:         // Ennn
 440 |     case kPatternEnnnn:        // Ennnn
 441 |     case kPatternnXnn:         // nXnn
 442 |     case kPatternnnXnn:        // nnXnn
 443 |     case kPatternTwoDigits:    // nn
 444 |     case kPatternFourDigits:   // nnnn
 445 |     case kPatternSixDigits:    // nnnnnn
 446 |     case kPatternEightDigits:  // nnnnnnnn
 447 |     case kPatternCountryUSA:   // (USA)
 448 |     case kPatternCountryUS:    // (US)
 449 |     case kPatternCountryUK:    // (UK)
 450 |     case kPatternYear:         // (nnnn)
 451 |     	break;
 452 | 
 453 |     default:
 454 |         hash = kPatternNoMatch;
 455 |         break;
 456 |     }
 457 |     return hash;
 458 | }
 459 | 
 460 | void tokenizeName( string originalName )
 461 | {
 462 | 	gTokenList.next = NULL;
 463 | 
 464 | 	string name = strdup( originalName ); // copy it, because we'll terminate strings in place as we go
 465 | 
 466 | 	if ( name != NULL)
 467 | 	{
 468 | 		unsigned char c;
 469 | 
 470 | 		string start = name;
 471 | 		string ptr   = start;
 472 | 		tHash  hash  = 0;
 473 | 
 474 | 		tToken * token = &gTokenList;
 475 | 
 476 | 		do {
 477 | 			c = kPatternMap[ *(unsigned char *)ptr ];
 478 | 			switch ( c )
 479 | 			{
 480 | 			case kPatternSeperator:
 481 | 			case '\0':
 482 | 				// reached the end of a token
 483 | 				token->next = calloc( 1, sizeof(tToken) );
 484 | 				token = token->next;
 485 | 				if ( token != NULL )
 486 | 				{
 487 | 					token->hash      = checkHash( hash );
 488 | 					token->start     = start;
 489 | 					token->end       = ptr;
 490 | 					token->seperator = *ptr;
 491 | 					*(char *)ptr = '\0';
 492 | 				}
 493 | 				// only prepare for the next run if we're not at the end of the string
 494 | 				if ( c != '\0' )
 495 | 				{
 496 | 					// skip over a run of kPatternSeperator, if present (e.g. ' - ')
 497 | 					do { ptr++; } while ( kPatternMap[ *(unsigned char *)ptr ] == kPatternSeperator );
 498 | 					start = ptr;
 499 | 					hash = 0;
 500 | 				}
 501 | 				break;
 502 | 
 503 | 			case '&':
 504 | 				hash = fPatternHashChar( hash, 'a' );
 505 | 				hash = fPatternHashChar( hash, 'n' );
 506 | 				hash = fPatternHashChar( hash, 'd' );
 507 | 				ptr++;
 508 | 				break;
 509 | 
 510 |             default:
 511 | 				hash = fPatternHashChar( hash, c );
 512 |                 ptr++;
 513 | 				break;
 514 | 			};
 515 | 		} while ( c != '\0' );
 516 | 
 517 | 		token = gTokenList.next;
 518 | 		while ( token != NULL )
 519 | 		{
 520 | 			debugf( 4, "token: \'%s\', \'%s\' (%c)\n", lookupHash( token->hash ), token->start, token->seperator );
 521 | 			token = token->next;
 522 | 		}
 523 | 	}
 524 | }
 525 | 
 526 | void freeTokenList( void )
 527 | {
 528 |     tToken *nextToken;
 529 |     tToken * token = gTokenList.next;
 530 |     gTokenList.next = NULL;
 531 |     while ( token != NULL )
 532 |     {
 533 |         nextToken = token->next;
 534 |         free( token );
 535 |         token = nextToken;
 536 |     }
 537 | }
 538 | 
 539 | /*
 540 |  * Channels DVR:
 541 |  *   air date: yyyy-mm-dd
 542 |  *   recorded: yyyy-mm-dd-hhss
 543 |  * TVMosaic
 544 |  *   recorded: hhss-yyyymmdd
 545 |  *
 546 |  */
 547 | void mergeDigits( void )
 548 | {
 549 |     tToken * token[4];
 550 | 
 551 |     token[0] = gTokenList.next;
 552 | 
 553 |     while ( token[0] != NULL)
 554 |     {
 555 |         token[1] = token[0]->next;
 556 |         switch ( token[0]->hash )
 557 |         {
 558 |             // Channels DVR: YYYY-mm-dd
 559 |             //               YYYY-mm-dd-hhss
 560 |             //     TVMosaic: HHSS-yyyymmdd
 561 |         case kPatternFourDigits:
 562 |             if ( token[1] != NULL)
 563 |             {
 564 | 	            token[2] = token[1]->next;
 565 | 
 566 | 	            switch ( token[1]->hash )
 567 |                 {
 568 |                     // Channels DVR: YYYY-MM-dd
 569 |                     //               YYYY-MM-dd-hhss
 570 |                 case kPatternTwoDigits:
 571 |                     if ( token[1]->seperator == '-' && token[2] != NULL)
 572 |                     {
 573 |                         switch ( token[2]->hash )
 574 |                         {
 575 |                             // Channels DVR: YYYY-MM-DD
 576 |                             //               YYYY-MM-DD-hhss
 577 |                         case kPatternTwoDigits:
 578 |                             token[3] = token[2]->next;
 579 |                             if (token[3] != NULL)
 580 |                             {
 581 |                                 switch ( token[3]->hash )
 582 |                                 {
 583 |                                 case kPatternFourDigits:
 584 |                                     // ok, looks like we have YYYY-MM-DD-HHSS
 585 |                                     token[0]->next = token[3]->next;
 586 |                                     *(char *) token[0]->end = '-';
 587 |                                     *(char *) token[1]->end = '-';
 588 |                                     *(char *) token[2]->end = '-';
 589 |                                     token[0]->end  = token[3]->end;
 590 |                                     token[0]->hash = kKeywordDateRecorded;
 591 |                                     free( token[1] );
 592 |                                     free( token[2] );
 593 |                                     free( token[3] );
 594 |                                     break;
 595 | 
 596 |                                 default:
 597 |                                     // ok, looks like we have YYYY-MM-DD
 598 |                                     token[0]->next = token[2]->next;
 599 |                                     *(char *) token[0]->end = '-';
 600 |                                     *(char *) token[1]->end = '-';
 601 |                                     token[0]->end  = token[2]->end;
 602 |                                     token[0]->hash = kKeywordFirstAired;
 603 |                                     free( token[1] );
 604 |                                     free( token[2] );
 605 |                                     break;
 606 |                                 }
 607 |                             }
 608 |                             break;
 609 | 
 610 |                         default:
 611 |                             break;
 612 |                         }
 613 |                     }
 614 |                     break;
 615 | 
 616 | 	                // TVMosaic: HHSS-YYYYMMDD
 617 |                 case kPatternEightDigits:
 618 | 	                token[0]->next = token[1]->next;
 619 | 	                *(char *) token[0]->end = '-';
 620 | 	                token[0]->end  = token[1]->end;
 621 | 	                token[0]->hash = kKeywordDateRecorded;
 622 | 	                free( token[1] );
 623 | 	                break;
 624 | 
 625 |                 default:
 626 |                     token[0]->hash = kPatternNoMatch;
 627 |                     break;
 628 |                 }
 629 |             }
 630 |             else
 631 |             {
 632 |                 // last token, therefore four trailing digits, no metapattern
 633 |                 token[0]->hash = kPatternNoMatch;
 634 |             }
 635 |             break;
 636 | 
 637 |         default:
 638 |             // not kPatternFourDigits, ignore it.
 639 |             break;
 640 |         }
 641 |         token[0] = token[0]->next;
 642 |     }
 643 | }
 644 | 
 645 | void mergeNoMatch( void )
 646 | {
 647 |     tToken * token;
 648 |     tToken * nextToken;
 649 | 
 650 | 	token = gTokenList.next;
 651 |     while ( token != NULL)
 652 |     {
 653 |         nextToken = token->next;
 654 |         if ( nextToken != NULL && token->hash == kPatternNoMatch && nextToken->hash == kPatternNoMatch )
 655 |         {
 656 |             // combine the two kPatternNoMatch tokens
 657 |             token->next = nextToken->next;
 658 |             *(char *)token->end = ' ';
 659 |             token->end = nextToken->end;
 660 |             free( nextToken );
 661 |         }
 662 |         else
 663 |         {
 664 |             token = token->next;
 665 |         }
 666 |     }
 667 | 
 668 | 	/* Some tokens should also be appended as a suffix, while also retaining the token */
 669 | 	token = gTokenList.next;
 670 | 	while ( token != NULL)
 671 | 	{
 672 | 		nextToken = token->next;
 673 | 		if ( token->hash == kPatternNoMatch && nextToken != NULL )
 674 | 		{
 675 | 			switch ( nextToken->hash )
 676 | 			{
 677 | 				/* The tokens we treat as suffixes */
 678 | 			case kPatternCountryUK:
 679 | 			case kPatternCountryUS:
 680 | 			case kPatternCountryUSA:
 681 | 			case kPatternYear:
 682 | 				/* extend the kPatternNoMatch token to include the suffix */
 683 | 				*(char *)token->end = ' ';
 684 | 				token->end = nextToken->end;
 685 | 				break;
 686 | 
 687 | 			default:
 688 | 				break;
 689 | 			}
 690 | 		}
 691 | 		token = token->next;
 692 | 	}
 693 | }
 694 | 
 695 | int parseName( string name )
 696 | {
 697 |     tToken * token = gTokenList.next;
 698 | 
 699 |     tokenizeName( name );
 700 | 
 701 |     mergeDigits();
 702 |     mergeNoMatch();
 703 | 
 704 |     debugf( 4, "%s\n", "after merging" );
 705 |     token = gTokenList.next;
 706 |     while ( token != NULL)
 707 |     {
 708 |         debugf( 4, "token: \'%s\', \'%s\' (%c)\n", lookupHash( token->hash ), token->start, token->seperator );
 709 | 
 710 | 	    storeToken( token->hash, token->start );
 711 | 	    token = token->next;
 712 |     }
 713 | 
 714 |     freeTokenList();
 715 | 
 716 |     return 0;
 717 | }
 718 | 
 719 | 
 720 | /*
 721 |  * carve up the path into directory path, basename and extension
 722 |  * then pass basename onto parseName() to be processed
 723 |  */
 724 | int parsePath( string path )
 725 | {
 726 |     int result = 0;
 727 | 
 728 |     addParam( gFileDict, kKeywordSource, path );
 729 | 
 730 |     string lastPeriod = strrchr( path, '.' );
 731 |     string lastChar = path + strlen(path);
 732 |     if ( lastPeriod != NULL && (lastChar - lastPeriod) < 5 )
 733 |     {
 734 |         addParam( gFileDict, kKeywordExtension, lastPeriod );
 735 |     }
 736 |     else
 737 |     {
 738 |         lastPeriod = lastChar;
 739 |     }
 740 | 
 741 |     string lastSlash = strrchr( path, '/' );
 742 |     if ( lastSlash != NULL )
 743 |     {
 744 |         string p = strndup( path, lastSlash - path );
 745 |         addParam( gFileDict, kKeywordPath, p );
 746 |         free( (void *)p );
 747 | 
 748 |         ++lastSlash;
 749 |     }
 750 |     else
 751 |     {
 752 |         lastSlash = path; // no directories prefixed
 753 |     }
 754 | 
 755 |     string basename = strndup( lastSlash, lastPeriod - lastSlash );
 756 |     addParam( gFileDict, kKeywordBasename, basename );
 757 |     parseName( basename );
 758 |     free( (void *)basename );
 759 | 
 760 |     return result;
 761 | }
 762 | 
 763 | string buildString( string template )
 764 | {
 765 |     string result = NULL;
 766 |     string t = template;
 767 |     char * s;    // pointer into the returned string
 768 | 
 769 |     result = calloc( 1, 32768 );
 770 |     s = (char *)result;
 771 | 
 772 |     if ( s != NULL )
 773 |     {
 774 |         unsigned char c = *t++; // unsigned because it is used as an array subscript when calculating the hash
 775 |         while ( c != '\0' )
 776 |         {
 777 |             unsigned long hash;
 778 |             string k;
 779 | 
 780 |             switch (c)
 781 |             {
 782 |             case '{':   // start of keyword
 783 |                 k = t; // remember where the keyword starts
 784 | 
 785 |                 // scan the keyword and generate its hash
 786 |                 hash = 0;
 787 | 
 788 | 	            c = *t++;
 789 | 	            while ( c != '\0' && c != '}' && c != '?' )
 790 |                 {
 791 |                     if ( kKeywordMap[ c ] != kKeywordSeparator ) /* we ignore some characters when calculating the hash */
 792 |                     {
 793 |                         hash = fKeywordHashChar( hash, c );
 794 |                     }
 795 |                     c = *t++;
 796 |                 }
 797 | 
 798 |                 if ( hash != kKeywordTemplate ) // don't want to expand a {template} keyword in a template!
 799 |                 {
 800 |                     string value = findParam( hash );
 801 | 
 802 |                     if ( value == NULL ) // not in the dictionaries, check for an environment variable
 803 |                     {
 804 |                         string envkey = strndup( k, t - k - 1 );
 805 |                         value = getenv( envkey );
 806 |                         if ( value != NULL )
 807 |                         {
 808 |                             debugf( 3, "env=\"%s\", value=\"%s\"\n", envkey, value );
 809 |                         }
 810 |                         free( (void *)envkey );
 811 |                     }
 812 | 
 813 |                     if ( c != '?' )
 814 |                     {
 815 |                         // end of keyword, and not the beginning of a ternary expression
 816 |                         if ( value != NULL )
 817 |                         {
 818 |                             s = stpcpy( s, value );
 819 |                         }
 820 |                     }
 821 |                     else
 822 |                     {  // ternary operator, like {param?true:false} (true or false can be absent)
 823 | 
 824 |                         c = *t++;
 825 | 
 826 |                         if ( value != NULL )
 827 |                         {
 828 |                             // copy the 'true' clause
 829 |                             while ( c != '}' && c != ':' && c != '\0' )
 830 |                             {
 831 |                                 if ( c != '@' )
 832 |                                 {
 833 |                                     *s++ = c;
 834 |                                 }
 835 |                                 else
 836 |                                 {
 837 |                                     s = stpcpy( s, value );
 838 |                                 }
 839 | 
 840 |                                 c = *t++;
 841 |                             }
 842 | 
 843 |                             if ( c == ':' )
 844 |                             {
 845 |                                 // skip over the 'false' clause
 846 |                                 while ( c != '\0' && c != '}' )
 847 |                                 {
 848 |                                     c = *t++;
 849 |                                 }
 850 |                             }
 851 |                         }
 852 |                         else // if undefined, skip over 'true' pattern, find the ':' (or trailing '}')
 853 |                         {
 854 |                             // value is undefined, so skip ahead to the false clause (or keyword end)
 855 |                             while ( c != ':' && c != '}' && c != '\0' )
 856 |                             {
 857 |                                 c = *t++;
 858 |                             }
 859 | 
 860 |                             if ( c == ':' ) // did we find the 'false' clause?
 861 |                             {
 862 |                                 c = *t++;  // yep, so swallow the colon
 863 |                                 // copy the 'false' clause into the string
 864 |                                 // no '@' processing, as the parameter is not defined
 865 |                                 while ( c != '\0' && c != '}' )
 866 |                                 {
 867 |                                     *s++ = c;
 868 |                                     c = *t++;
 869 |                                 }
 870 |                             }
 871 |                         }
 872 |                     }
 873 |                 } // if !{template}
 874 |                 break;
 875 | 
 876 |             case '\\': // next template character is escaped, not interpreted, e.g. \{
 877 |                 c = *t++;
 878 |                 *s++ = c;
 879 |                 break;
 880 | 
 881 |             default:
 882 |                 *s++ = c;
 883 |                 break;
 884 |             } // switch
 885 | 
 886 |             c = *t++;
 887 |         }
 888 | 
 889 |         *s = '\0'; // always terminate the string
 890 |     }
 891 |     return result;
 892 | }
 893 | 
 894 | int parseConfigFile( tDictionary * dictionary, string path )
 895 | {
 896 |     int    result = 0;
 897 |     FILE * file;
 898 |     char   buffer[ 4096 ]; // 4K seems like plenty
 899 | 
 900 |     if ( eaccess( path, R_OK ) != 0 )   // only attempt to parse it if there's something accessible there
 901 |     {
 902 | 	    // it's OK if the file is missing, otherwise complain
 903 | 	    if ( errno != ENOENT )
 904 | 	    {
 905 | 		    fprintf( stderr,
 906 | 		             "### Error: Unable to access config file \'%s\' (%d: %s)",
 907 | 		             path, errno, strerror(errno));
 908 | 		    result = errno;
 909 | 	    }
 910 |     }
 911 | 	else
 912 | 	{
 913 | 	    debugf( 3, "config file: \'%s\'\n", path );
 914 | 
 915 | 	    file = fopen(path, "r");
 916 |         if (file == NULL)
 917 |         {
 918 |             fprintf( stderr, "### Error: Unable to open config file \'%s\' (%d: %s)\n",
 919 |                      path, errno, strerror(errno) );
 920 |             result = errno;
 921 |         }
 922 |         else
 923 |         {
 924 |             while ( fgets( buffer, sizeof( buffer ), file) != NULL )
 925 |             {
 926 |                 trimTrailingWhitespace( buffer );
 927 |                 debugf( 4,"line: \'%s\'\n", buffer );
 928 | 
 929 |                 tHash hash = 0;
 930 |                 string s = buffer;
 931 |                 while (isspace(*s)) {
 932 |                     s++;
 933 |                 }
 934 | 
 935 |                 unsigned char c = (unsigned char) *s++;
 936 |                 if (c != '\0') {
 937 |                     while (c != '\0' && c != '=') {
 938 |                         if ( c != kKeywordSeparator ) {
 939 |                             hash = fKeywordHashChar( hash, c );
 940 |                         }
 941 |                         c = (unsigned char) *s++;
 942 |                     }
 943 | 
 944 |                     if (c == '=') {
 945 |                         // skip over whitespace from the beginning of the value
 946 |                         while ( isspace(*s) ) {
 947 |                             s++;
 948 |                         }
 949 | 	                    trimTrailingWhitespace( (char *)s );
 950 |                     }
 951 |                     debugf( 4,"hash = 0x%016lx, value = \'%s\'\n", hash, s);
 952 |                     addParam( dictionary, hash, s );
 953 |                 }
 954 |             }
 955 |             fclose(file);
 956 |         }
 957 |     }
 958 | 
 959 |     return result;
 960 | }
 961 | 
 962 | /**
 963 |  * @brief Look for config files to process, and use them to update the main dictionary.
 964 |  *
 965 |  * First, look in /etc/<argv[0]>.conf then in ~/.config/<argv[0]>.conf, and finally the file
 966 |  * passed as a -c parameter, if any, then any parameters on the command line (except -c)
 967 |  * Where a parameter occurs more than once in a dictionary, the most recent definition 'wins'
 968 |  */
 969 | 
 970 | int parseConfig( string path )
 971 | {
 972 |     int  result = 0;
 973 |     char temp[PATH_MAX];
 974 | 
 975 |     snprintf( temp, sizeof( temp ), "/etc/%s.conf", gMyName );
 976 |     debugf( 4, "/etc path: \"%s\"\n", temp );
 977 | 
 978 |     result = parseConfigFile( gMainDict, temp );
 979 | 
 980 |     if ( result == 0 )
 981 |     {
 982 |         string home = getenv("HOME");
 983 |         if ( home == NULL)
 984 |         {
 985 |             home = getpwuid( getuid() )->pw_dir;
 986 |         }
 987 |         if ( home != NULL )
 988 |         {
 989 |             snprintf( temp, sizeof( temp ), "%s/.config/%s.conf", home, gMyName );
 990 |             debugf( 4, "~ path: \"%s\"\n", temp );
 991 | 
 992 |             result = parseConfigFile( gMainDict, temp );
 993 |         }
 994 |     }
 995 | 
 996 |     if ( result == 0 && path != NULL )
 997 |     {
 998 |     	struct stat fileStat;
 999 | 
1000 |     	if ( stat( path, &fileStat ) != 0 )
1001 | 	    {
1002 | 		    fprintf( stderr, "### Error: config path '%s' is not valid (%d: %s)\n",
1003 | 		    		 path, errno, strerror(errno) );
1004 | 		    result = -1;
1005 | 	    }
1006 | 	    switch ( fileStat.st_mode & S_IFMT )
1007 | 	    {
1008 | 	    case S_IFDIR:
1009 | 		    snprintf( temp, sizeof( temp ), "%s/%s.conf", path, gMyName );
1010 | 		    break;
1011 | 
1012 | 	    case S_IFLNK:
1013 | 	    case S_IFREG:
1014 | 		    strncpy( temp, path, sizeof( temp ) );
1015 | 		    break;
1016 | 
1017 | 	    default:
1018 | 		    fprintf( stderr, "### Error: config path '%s' is neither a file nor directory.\n", path );
1019 | 		    result = -1;
1020 | 		    break;
1021 | 	    }
1022 | 
1023 |     	if ( result == 0 )
1024 | 	    {
1025 | 		    debugf( 4, "-c path: %s\n", temp );
1026 | 		    result = parseConfigFile( gMainDict, temp );
1027 | 	    }
1028 |     }
1029 | 
1030 |     return result;
1031 | }
1032 | 
1033 | /**
1034 |  * @brief recurive function to walk the path looking for config files
1035 |  * @param gFileDict
1036 |  * @param path
1037 |  */
1038 | void _recurseConfig( tDictionary * dictionary, string path )
1039 | {
1040 | 	char temp[PATH_MAX];
1041 | 
1042 | 	if ( strlen(path) != 1 || (path[0] != '/' && path[0] != '.'))
1043 | 	{
1044 | 		strncpy( temp, path, sizeof( temp ) );
1045 | 		_recurseConfig( dictionary, dirname( temp ) );
1046 | 		/* check for a config file & if found, parse it */
1047 | 		debugf( 4, "recurse = \'%s\'\n", path );
1048 | 		snprintf( temp, sizeof(temp), "%s/%s.conf", path, gMyName );
1049 | 		parseConfigFile( dictionary, temp );
1050 | 	}
1051 | }
1052 | 
1053 | /**
1054 |    Traverse the path to the source file, looking for config files.
1055 |    Apply them in the reverse order, so ones lower in the hierarchy
1056 |    can override parameters defined in higher ones.
1057 |  */
1058 | int processConfigPath( string path )
1059 | {
1060 | 	int  result = 0;
1061 | 	char temp[PATH_MAX];
1062 | 	char * absolute;
1063 | 
1064 | 	/* dirname may modify its argument, so make a copy first */
1065 | 	strncpy( temp, path, sizeof(temp) );
1066 | 	absolute = realpath( dirname(temp), NULL );
1067 | 	if ( absolute == NULL )
1068 | 	{
1069 | 		fprintf( stderr, "### Error: path \'%s\' appears to be invalid (%d: %s).\n",
1070 | 				 path, errno, strerror(errno) );
1071 | 		return -5;
1072 | 	}
1073 | 	else
1074 | 	{
1075 | 		debugf( 3, "abs = %s, cached = %s\n", absolute, gCachedPath );
1076 | 		if ( gCachedPath == NULL || strcmp( gCachedPath, absolute ) != 0 )
1077 | 		{
1078 | 			debugf( 3, "absolute = \'%s\'\n", absolute );
1079 | 			emptyDictionary( gPathDict );
1080 | 			gCachedPath = absolute;
1081 | 			_recurseConfig( gPathDict, absolute );
1082 | 		}
1083 | 
1084 | 		/* we may have picked up a new definition of {destination} as
1085 | 		 * a result of parsing different config files. If so, we need
1086 | 		 * to rebuild gSeriesDict to reflect the new destination */
1087 | 
1088 | 		string destination = findParam( kKeywordDestination );
1089 | 
1090 | 		if ( destination == NULL)
1091 | 		{
1092 | 			fprintf( stderr, "### Error: no destination defined.\n" );
1093 | 			result = -3;
1094 | 		}
1095 | 		else
1096 | 		{
1097 | 			if ( gCachedSeries == NULL || strcmp( gCachedSeries, destination ) != 0 )
1098 | 			{
1099 | 				debugf( 2, "destination = \'%s\'\n", destination );
1100 | 				// fill the dictionary with hashes of the directory names in the destination
1101 | 				emptyDictionary( gSeriesDict );
1102 | 				gCachedSeries = destination;
1103 | 				buildSeriesDictionary( destination );
1104 | 			}
1105 | 		}
1106 | 	}
1107 | 	return result;
1108 | }
1109 | 
1110 | int processFile( string path )
1111 | {
1112 |     int result = 0;
1113 | 
1114 |     processConfigPath( path );
1115 | 
1116 | 	parsePath( path );
1117 | 
1118 |     printDictionary( gFileDict );
1119 | 
1120 |     string template = findParam( kKeywordTemplate );
1121 | 
1122 |     if ( template == NULL)
1123 |     {
1124 |         fprintf( stderr, "### Error: no template found.\n" );
1125 |         result = -2;
1126 |     }
1127 |     else
1128 |     {
1129 |         debugf( 2, "template = \'%s\'\n", template );
1130 | 
1131 |         string output = buildString( template );
1132 |         string exec   = findParam( kKeywordExecute );
1133 |         if ( exec != NULL)
1134 |         {
1135 | 	        result = system( output );
1136 |         }
1137 |         else
1138 |         {
1139 | 	        printf( "%s\n", output );
1140 |         }
1141 |         free( (void *)output );
1142 |     }
1143 |     emptyDictionary( gFileDict );
1144 |     return result;
1145 | }
1146 | 
1147 | string usage =
1148 | "Command Line Options\n"
1149 | "  -d <string>  set {destination} parameter\n"
1150 | "  -t <string>  set {template} paameter\n"
1151 | "  -x           pass each output string to the shell to execute\n"
1152 | "  --           read from stdin\n"
1153 | "  -0           stdin is null-terminated (also implies '--' option)\n"
1154 | "  -v <level>   set the level of verbosity (debug info)\n";
1155 | 
1156 | 
1157 | int main( int argc, string argv[] )
1158 | {
1159 |     int  result;
1160 |     int  cnt;
1161 |     string configPath = NULL;
1162 | 	time_t secsSinceEpoch;
1163 | 	struct tm *timeStruct;
1164 | 
1165 |     gMainDict   = createDictionary( "Main" );
1166 | 	gSeriesDict = createDictionary( "Series" );
1167 | 	gPathDict   = createDictionary( "Path" );
1168 | 	gFileDict   = createDictionary( "File" );
1169 | 
1170 | 	gMyName = basename( strdup( argv[0] ) ); // posix flavor of basename modifies its argument
1171 | 
1172 |     secsSinceEpoch = time( NULL );
1173 | 	timeStruct = localtime( &secsSinceEpoch );
1174 | 	if ( timeStruct != NULL )
1175 | 	{
1176 | 		gNextYear = timeStruct->tm_year + 1900 + 1;
1177 | 	}
1178 | 
1179 |     int k = 1;
1180 | 	cnt = argc;
1181 |     for ( int i = 1; i < argc; i++ )
1182 |     {
1183 |         debugf( 4, "a: i = %d, k = %d, cnt = %d, \'%s\'\n", i, k, cnt, argv[ i ] );
1184 | 
1185 |         // is it the config file option?
1186 |         if ( strcmp( argv[ i ], "-c" ) == 0 )
1187 |         {
1188 |             cnt -= 2;
1189 |             ++i;
1190 |             configPath = strdup( argv[ i ] );   // make a copy - argv will be modified
1191 |         }
1192 |         else
1193 |         {
1194 |             if ( i != k )
1195 |             {
1196 |                 argv[ k ] = argv[ i ];
1197 |             }
1198 |             ++k;
1199 |         }
1200 |     }
1201 |     argc = cnt;
1202 | 
1203 |     result = parseConfig( configPath );
1204 | 
1205 |     if ( configPath != NULL )
1206 |     {
1207 |         free( (void *)configPath );
1208 |         configPath = NULL;
1209 |     }
1210 | 
1211 |     k = 1;
1212 |     for ( int i = 1; i < argc && result == 0; i++ )
1213 |     {
1214 |         debugf( 4, "b: i = %d, k = %d, cnt = %d, \'%s\'\n", i, k, cnt, argv[i] );
1215 | 
1216 |         // is it an option?
1217 |         if (argv[i][0] == '-' )
1218 |         {
1219 |             char option = argv[i][1];
1220 |             if ( argv[i][2] != '\0' )
1221 |             {
1222 |                 fprintf( stderr, "### Error: option \'%s\' not understood.\n", argv[ i ] );
1223 |                 fprintf( stderr, "%s", usage );
1224 |                 result = -1;
1225 |             }
1226 |             else
1227 |             {
1228 |                 --cnt;
1229 | 
1230 |                 switch ( option )
1231 |                 {
1232 |                 // case 'c':   // config file already handled
1233 |                 //  break;
1234 | 
1235 |                 case 'd':   // destination
1236 |                     addParam( gMainDict, kKeywordDestination, argv[ i ] );
1237 |                     --cnt;
1238 |                     ++i;
1239 |                     break;
1240 | 
1241 |                 case 't':   // template
1242 |                     addParam( gMainDict, kKeywordTemplate, argv[ i ] );
1243 |                     --cnt;
1244 |                     ++i;
1245 |                     break;
1246 | 
1247 |                 case 'x':   // execute
1248 |                     addParam( gMainDict, kKeywordExecute, "yes" );
1249 |                     break;
1250 | 
1251 |                 case '-':   // also read lines from stdin
1252 |                     addParam( gMainDict, kKeywordStdin, "yes" );
1253 |                     break;
1254 | 
1255 |                 case '0':   // entries from stdio are terminated with NULLs
1256 |                     addParam( gMainDict, kKeywordStdin, "yes" );
1257 |                     addParam( gMainDict, kKeywordNullTermination, "yes" );
1258 |                     break;
1259 | 
1260 |                 case 'v': // verbose output, i.e. show debug logging
1261 |                     if ( i < argc - 1 )
1262 |                     {
1263 |                         ++i;
1264 |                         --cnt;
1265 | 
1266 |                         gDebugLevel = atoi( argv[i] );
1267 |                         fprintf(stderr, "verbosity = %d\n", gDebugLevel );
1268 |                     }
1269 |                     break;
1270 | 
1271 |                 default:
1272 |                     ++cnt;
1273 |                     --i; // point back at the original option
1274 |                     fprintf( stderr, "### Error: option \'%s\' not understood.\n", argv[ i ] );
1275 |                     fprintf( stderr, "%s", usage );
1276 |                     result = -1;
1277 |                     break;
1278 |                 }
1279 |             }
1280 |         }
1281 |         else
1282 |         {
1283 |             if ( i != k )
1284 |             {
1285 |                 argv[k] = argv[i];
1286 |             }
1287 |             ++k;
1288 |         }
1289 |     }
1290 |     argc = cnt;
1291 | 
1292 |     /* printDictionary( mainDict ); */
1293 | 
1294 |     for ( int i = 1; i < argc; i++ )
1295 |     {
1296 | 	    debugf( 4, "b: i = %d, k = %d, cnt = %d, \'%s\'\n", i, k, cnt, argv[i] );
1297 | 
1298 | 	    // is it an option?
1299 | 	    if ( argv[i][0] == '-' )
1300 | 	    {
1301 | 		    char option = argv[i][1];
1302 | 		    if ( argv[i][2] != '\0' )
1303 | 		    {
1304 | 			    fprintf( stderr, "### Error: option \'%s\' not understood.\n", argv[i] );
1305 | 			    result = -1;
1306 | 		    }
1307 | 		    else
1308 | 		    {
1309 | 			    --cnt;
1310 | 
1311 | 			    switch ( option )
1312 | 			    {
1313 | 			    case 'd':   // destination
1314 | 				    addParam( gMainDict, kKeywordDestination, argv[i] );
1315 | 				    --cnt;
1316 | 				    ++i;
1317 | 				    break;
1318 | 
1319 | 			    case 't':   // template
1320 | 				    addParam( gMainDict, kKeywordTemplate, argv[i] );
1321 | 				    --cnt;
1322 | 				    ++i;
1323 | 				    break;
1324 | 
1325 | 			    case 'x':   // execute
1326 | 				    addParam( gMainDict, kKeywordExecute, "yes" );
1327 | 				    break;
1328 | 
1329 | 			    case '-':   // also read lines from stdin
1330 | 				    addParam( gMainDict, kKeywordStdin, "yes" );
1331 | 				    break;
1332 | 
1333 | 			    case '0':   // entries from stdio are terminated with NULLs
1334 | 				    addParam( gMainDict, kKeywordNullTermination, "yes" );
1335 | 				    break;
1336 | 
1337 | 			    case 'v': //verbose output, i.e. debug logging
1338 | 				    if ( i < argc - 1 )
1339 | 				    {
1340 | 					    ++i;
1341 | 					    --cnt;
1342 | 
1343 | 					    gDebugLevel = atoi( argv[i] );
1344 | 					    fprintf( stderr, "verbosity = %d\n", gDebugLevel );
1345 | 				    }
1346 | 				    break;
1347 | 
1348 | 			    default:
1349 | 				    ++cnt;
1350 | 				    --i; // point back at the original option
1351 | 				    fprintf( stderr, "### Error: option \'%s\' not understood.\n", argv[i] );
1352 | 				    result = -1;
1353 | 				    break;
1354 | 			    }
1355 | 		    }
1356 | 	    }
1357 | 	    else
1358 | 	    {
1359 | 		    if ( i != k )
1360 | 		    {
1361 | 			    argv[k] = argv[i];
1362 | 		    }
1363 | 		    ++k;
1364 | 	    }
1365 |     }
1366 |     argc = cnt;
1367 | 
1368 |     printDictionary( gMainDict );
1369 | 
1370 |     for ( int i = 1; i < argc && result == 0; ++i )
1371 |     {
1372 |         debugf( 4, "%d: \'%s\'\n", i, argv[ i ] );
1373 |         processFile( argv[i] );
1374 |     }
1375 | 
1376 |     // should we also read from stdin?
1377 |     if ( findParam( kKeywordStdin ) != NULL )
1378 |     {
1379 |         char line[PATH_MAX];
1380 | 
1381 |         if ( findParam( kKeywordNullTermination ) != NULL )
1382 |         {
1383 |             // ...therefore lines are terminated by \0
1384 |             char * p = line;
1385 |             cnt = sizeof( line );
1386 | 
1387 |             while (!feof(stdin))
1388 |             {
1389 |                 char c = fgetc( stdin );
1390 |                 *p++ = c;
1391 |                 cnt--;
1392 | 
1393 |                 if ( c == '\0' || cnt < 1 )
1394 |                 {
1395 |                     debugf( 4, "null: %s\n", line );
1396 |                     processFile( line );
1397 | 
1398 |                     p = line;
1399 |                     cnt = sizeof( line );
1400 |                 }
1401 |             }
1402 |         }
1403 |         else
1404 |         {
1405 |             while (!feof(stdin))
1406 |             {
1407 |                 // ...otherwise lines are terminated by \n
1408 |                 fgets( line, sizeof(line), stdin );
1409 | 
1410 |                 // lop off the inevitable trailing newline(s)/whitespace
1411 |                 trimTrailingWhitespace( line );
1412 |                 debugf( 4,"eol: %s\n", line);
1413 |                 processFile( line);
1414 |             }
1415 |         }
1416 |     }
1417 | 
1418 |     // all done, clean up.
1419 | 	destroyDictionary( gFileDict );
1420 | 	destroyDictionary( gPathDict );
1421 | 	destroyDictionary( gSeriesDict );
1422 | 	destroyDictionary( gMainDict );
1423 | 
1424 |     return result;
1425 | }
1426 | 


--------------------------------------------------------------------------------
/dvr2plex.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by Paul on 4/4/2019.
 3 | //
 4 | 
 5 | #ifndef DVR2PLEX_H
 6 | #define DVR2PLEX_H
 7 | 
 8 | #if CMAKE_BUILD_TYPE == Debug
 9 | #define DEBUG 1
10 | #endif
11 | 
12 | typedef const char * string;
13 | 
14 | extern int gDebugLevel;
15 | #define debugf( level, format, ... ) do { if (gDebugLevel >= level) fprintf( stderr, format, __VA_ARGS__ ); } while (0)
16 | 
17 | #endif // DVR2PLEX_H
18 | 


--------------------------------------------------------------------------------
/keywords.hash:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by paul on 1/14/20.
 3 | //
 4 | 
 5 | prefix = "Keyword"
 6 | 
 7 | # the character mapping array
 8 | # array starts out mapping one-to-one, i.e. input = output
 9 | #
10 | mappings:
11 | {
12 |     # make the generated hashes case-insensitive
13 |     ignoreCase = true
14 | 
15 |     # mark these characters as 'kKeywordSeparator'
16 |     Separator = " ._-"
17 | 
18 |     Ignored = "?!"
19 | 
20 |     # mark the different styles of brackets as equivalent
21 |     LBracket = "({["
22 |     RBracket = ")}]"
23 | }
24 | 
25 | # the strings to hash into the enum
26 | # if there's a comma, first string is for symbol, second is to hash
27 | #
28 | keywords = [
29 |     "Basename",
30 |     "Country",
31 |     "DateRecorded",
32 |     "DestSeries",
33 |     "Destination",
34 |     "Episode",
35 |     "Execute",
36 |     "Extension",
37 |     "FirstAired",
38 |     "NullTermination",
39 |     "Path",
40 |     "Season",
41 |     "SeasonFolder",
42 |     "Series",
43 |     "Source",
44 |     "Stdin",
45 |     "Template",
46 |     "Title",
47 |     "Year"
48 | ]


--------------------------------------------------------------------------------
/patterns.hash:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by paul on 1/14/20.
 3 | //
 4 | 
 5 | prefix = "Pattern"
 6 | 
 7 | # the character mapping array
 8 | # array starts out mapping one-to-one, i.e. input = output
 9 | #
10 | mappings:
11 | {
12 |     # make the generated hashes case-insensitive
13 |     ignoreCase = true
14 | 
15 |     # mark these characters as 'kPatternSeperator'
16 |     Seperator = " ._-"
17 | 
18 |     # mark this range as 'kPatternDigit'
19 |     Digit = "0-9"
20 | 
21 |     # mark the different styles of brackets as equivalent
22 |     LBracket = "({["
23 |     RBracket = ")}]"
24 | }
25 | 
26 | # the strings to hash into the enum
27 | # if there's a comma, first string is for symbol, second is to hash
28 | #
29 | keywords = [
30 |     "SnnEnn,S00E00",
31 |     "SyyyyEnn,S0000E00",
32 |     "SnnEn,S00E0",
33 |     "SnEnn,S0E00",
34 |     "SnEn,S0E0",
35 |     "Ennn,E000",
36 |     "Ennnn,E0000",
37 |     "nXnn,0x00",
38 |     "nnXnn,00x00",
39 |     "TwoDigits,00",
40 |     "FourDigits,0000",
41 |     "SixDigits,000000",
42 |     "EightDigits,00000000",
43 |     "CountryUSA,(USA)",
44 |     "CountryUS,(US)",
45 |     "CountryUK,(UK)",
46 |     "Year,(0000)"
47 | ]
48 | 


--------------------------------------------------------------------------------