├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── dictionary.c
├── dictionary.h
├── docs
├── README.md
└── _config.yml
├── dvr2plex.c
├── dvr2plex.h
├── keywords.hash
└── patterns.hash
/.gitignore:
--------------------------------------------------------------------------------
1 | # Prerequisites
2 | *.d
3 |
4 | # Object files
5 | *.o
6 | *.ko
7 | *.obj
8 | *.elf
9 |
10 | # Linker output
11 | *.ilk
12 | *.map
13 | *.exp
14 |
15 | # Precompiled Headers
16 | *.gch
17 | *.pch
18 |
19 | # Libraries
20 | *.lib
21 | *.a
22 | *.la
23 | *.lo
24 |
25 | # Shared objects (inc. Windows DLLs)
26 | *.dll
27 | *.so
28 | *.so.*
29 | *.dylib
30 |
31 | # Executables
32 | *.exe
33 | *.out
34 | *.app
35 | *.i*86
36 | *.x86_64
37 | *.hex
38 |
39 | # Debug files
40 | *.dSYM/
41 | *.su
42 | *.idb
43 | *.pdb
44 |
45 | # Kernel Module Compile Results
46 | *.mod*
47 | *.cmd
48 | .tmp_versions/
49 | modules.order
50 | Module.symvers
51 | Mkfile.old
52 | dkms.conf
53 |
54 | /dvr2plex
55 | .idea/
56 | cmake-*/
57 | keywords.h
58 | patterns.h
59 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | project(DVR2Plex)
4 | set( CMAKE_BUILD_TYPE DEBUG )
5 |
6 | set( CMAKE_C_STANDARD 11 )
7 | set( CMAKE_C_FLAGS "-Wall -Wextra" )
8 |
9 | include_directories(.)
10 |
11 | file(GLOB HASHES "*.hash")
12 | set(OUTFILES)
13 | foreach(HASH ${HASHES})
14 |
15 | string(REGEX REPLACE "(.*).hash$" "\\1.h" OUTPUT_FILE_NAME ${HASH})
16 |
17 | add_custom_command(
18 | OUTPUT "${OUTPUT_FILE_NAME}"
19 | COMMAND hashstrings ${HASH}
20 | DEPENDS "${HASH}")
21 |
22 | set(OUTFILES ${OUTFILES} "${OUTPUT_FILE_NAME}")
23 |
24 | endforeach(HASH)
25 |
26 | add_custom_target(hashes ALL DEPENDS ${OUTFILES})
27 |
28 | add_executable( DVR2Plex dvr2plex.c dvr2plex.h dictionary.c dictionary.h)
29 | target_link_libraries( DVR2Plex "/usr/lib/x86_64-linux-gnu/libdl.so" )
30 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Paul Chambers
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://www.codacy.com/manual/paul-chambers/DVR2Plex?utm_source=github.com&utm_medium=referral&utm_content=paul-chambers/DVR2Plex&utm_campaign=Badge_Grade)
2 |
3 | [Full Documentation](https://paul-chambers.github.io/DVR2Plex/)
4 |
5 | # DVR2Plex
6 |
7 | This tool uses some fancy text processing techniques to reformat filename
8 | into another one. To be useful, this tool needs to be used with other Linux
9 | command line tools, e.g. to copy (or hardlink) files to their new location.
10 |
11 | Since DVR2Plex isn't actually doing the copy/hardlinking itself, I've included
12 | a simple utility called **mkln**. I'd recommend using it, at least initially,
13 | as it's *safe* - it won't replace an existing file in the destination,
14 | (will append a number inside braces to avoid the name collision), and
15 | automatically creates directories specified by the target path that don't
16 | yet exist.
17 |
18 | ***Caution** You must accept responsibility for your configuration and use
19 | of this tool, and accept that data loss is a possibility. Please be careful
20 | when using this tool.
21 |
22 | **Note:** this tool was written for a Linux environment. It *should* work
23 | fine inside WSL (Windows Services for Linux), but has had little testing
24 | there.
25 |
26 | ## Why does this exist?
27 |
28 | I'm a long-time user of [Plex](https://plex.tv/), and use related tools to
29 | supply content for the Plex content library. Plex has a preferred way it
30 | likes to see the library organized, and things generally go more smoothly
31 | if everything uses the same organization and naming strategy.
32 |
33 | I'm also a fan of the [Channels DVR](https://getchannels.com/dvr-server/),
34 | which is well implemented and has some features that I find particularly
35 | useful. It keeps its recordins in a private directory, and while it is
36 | well-organized, it's in a way that's a little different to the structure
37 | that Plex prefers. More importantly, Channels DVR 'owns' the files in
38 | that folder, and other software should respect that, and not 'pull the rug'
39 | out from under Channels DVR by messing with those files behind its back.
40 |
41 | While you could point Plex at the Channels directories holding the
42 | recordings, and Plex will figure things out. However, it should treat
43 | those directory contents as read-only, otherwise Plex will be altering
44 | files that Channels owns.
45 |
46 | I initially wrote a shell script that hardlinked the recordings Channels
47 | DVR made in its private directory into the 'right place' in my Plex
48 | library. A 'hard link' doesn't use any more disk space, and has the
49 | positive attribute that the hard link in the Plex Library can be moved
50 | and/or renamed without affecting the one in the Channels DVR 'private'
51 | directory. The inverse is also true - the Channels DVR can delete its
52 | file in the 'private' directory, without affecting the other link to it
53 | in the Plex library, so it will remain. This is very handy when used
54 | with the 'only keep *n* episodes' in Channels DVR (or
55 | [kmttg](https://sourceforge.net/projects/kmttg) and
56 | [jellyfin](https://jellyfin.org/) for that matter).
57 |
58 | *Problem solved, right?* Well, mostly...
59 |
60 | The biggest issue with such a simple approach is that the world
61 | hasn't yet settled on how a series is named, and possibly never will.
62 | For example, a series like "Marvel's Agents of S.H.I.E.L.D." there
63 | are a number of variations on that title that you'll see in the wild.
64 | Variations like "Marvel's Agents of S.H.I.E.L.D. (2013)", or
65 | "Marvels Agents of S.H.I.E.L.D" (no single quote, no trailing period),
66 | all the way to "marvels.agents.of.shield".
67 |
68 | If that isn't accounted for, then differently-named directories will
69 | accumulate, containing episodes of the same series, often duplicates.
70 | This only gets worse as the number of content sources increases.
71 |
72 | Not good. This is the itch this tool scratches.
73 |
74 | ## OK, but what does it *do?*
75 |
76 | In a nutshell, it's a specialized string manipulation tool. You feed it
77 | the name of a media file, it parses out the series, season, episode,
78 | episode title, etc. from the name, and provides a 'template' system
79 | that allows you to easily reassemble a new name for the destination
80 | from the parts it extracted from the source filename.
81 |
82 | *That sounds like something you could do with `sed` or `awk`. So why
83 | write this?*
84 |
85 | The pattern matching is done in a loose/fuzzy way that would be
86 | impractical to do in bash script or command-line string manipulation
87 | tools.
88 |
89 | It uses a fundamentally different technique - character-mapped hashing
90 | - than the usual simple character-by-character string comparison or
91 | regular expression methods.
92 |
93 | See "How does it work?" below, if you're curious about the details.
94 |
95 | The 'template' describes the form of the string that this tool should
96 | output. The component parts are substituted in the appropriate place
97 | where you put something like "{episode}". There are quite a number of
98 | these parameters:
99 |
100 | | Parameter Name | Description |
101 | |--- |--- |
102 | | {source} | The path to the source file, as passed to this tool. |
103 | | {path} | The 'dirname' part of the source (no trailing slash) |
104 | | {basename} | The 'basename' of the source (minus the extension) |
105 | | {extension} | The extension. Separated so that if what you want to do is convert containers, you can write something like {path}/{basename}.mkv as the destination in the template |
106 | | {series} | The raw name of the series (as extracted from the source) |
107 | | {season} | Always at least two digits, zero-padded |
108 | | {seasonfolder} | If the season is zero, this will be "Specials", otherwise "Season {season}" |
109 | | {episode} | Always at least two digits, zero-padded |
110 | | {title} | The episode title |
111 | | {destseries} | This is the target folder that the tool determined (by fuzzy match) is the right destination for the file.
More details below. |
112 | | {destination} | The destination directory for the file. Also scanned as part of the fuzzy matching |
113 | | {firstaired} | The date this episode first aired *(specific to Channels DVR files)* |
114 | | {daterecorded} | The date/time Channels DVR recorded this *(specific to Channels DVR files)* |
115 | | {template} | It's a parameter too (though you can't use it in a template, obviously) |
116 |
117 | This is only the predefined list of parameters that the parsing will
118 | pre-populate automatically - except for {destination} and {template},
119 | which need to be defined by the user. They can either be defined on the
120 | command line, or in a config file - the tool looks for
121 | `/etc/DVR2Plex.conf` and `~/.config/DVR2Plex.conf`, then will
122 | process the config file defined by the `-c` command line option, before
123 | finishing with any command line options. Parameters can be defined
124 | multiple times, the last one wins. So you could, for example, define a
125 | a default {title} as '(unknown)' in a config file, and it would be used
126 | if the file name parsing didn't find an episode title.
127 |
128 | DVR2Plex also looks for config files in the filesystem hierarchy above
129 | the source file. This is particularly useful for providing different
130 | templates and destinations for **TV** vs. **Movie** recordings, or
131 | handling a system running multiple DVR software (as I am). Obviously
132 | this check is made for each directory that DVR2Plex is asked to process
133 | files within, and so if found, these config files have 'the last say' in
134 | setting parameters, overriding everything else.
135 |
136 | You may also define your own parameters in the config file, and use them
137 | in the template. And if the output-building code can't find a parameter
138 | name that matches in its dictionaries, it will also look for an
139 | environment variable with that name (case-sensitive, in this case). So
140 | {HOME} will be replaced by the path to the user's home directory (i.e.
141 | the equivalent of '~' in the shell)
142 |
143 | The assumption is that at least one of the config file would contain
144 | at least the {destination} and {template} parameters, since those are
145 | likely to be the consistent on a given machine.
146 |
147 | For example. `/etc/DVR2Plex.conf` might contain:
148 | ```
149 | destination = /home/video/TV
150 | template = mkln "{source}" "{destination}/{destseries?@/}{seasonfolder?@/}{destseries?@ }{season?S@}{episode?E@:-}{title? @}{extension}"
151 | ```
152 | So assuming that the source file was
153 | `/home/Channels/TV/Person of Interest/Person of Interest S02E16 2013-02-21 Relevance 2018-12-30-0000.mpg`
154 | and a directory existed called `/home/video/TV/Person of Interest (2011)`
155 | then that template would output (or execute):
156 |
157 | `mkln "/home/video/TV/Person of Interest/Person of Interest S02E16 2013-02-21 Relevance 2018-12-30-0000.mpg" "/home/video/TV/Person of Interest (2011)/Season 02/Person of Interest (2011) S02E16 Relevence.mpg"`
158 |
159 | but perhaps more impressive is that a source file of `/home/paul/downloads/person.of.interest.2x16.relevence.mpg`
160 | would also create the same destination:
161 | `"/home/video/TV/Person of Interest (2011)/Season 02/Person of Interest (2011) S02E16 Relevence.mpg"`
162 |
163 | **Caution** DVR2Plex will blindly execute whatever you tell it to execute. It
164 | is just manipulating strings, after all. It has no notion of the quality of
165 | the source file vs. an existing destination file, and will happily overwrite
166 | a high quality file with a lower quality one if that's what you tell it to do.
167 |
168 | Be aware of this when creating a template you expect DVR2Plex to execute directly.
169 |
170 | ### Conditional Expansions
171 | *But wait, what on earth does {episode?E@:-} mean?*
172 |
173 | When a parameter isn't defined, it expands to nothing. Which is all well
174 | and good, except if there's some surrounding characters that need to
175 | disappear too. {episode?E@:-} means 'if {episode} is defined, output 'E'
176 | followed by the contents of {episode}, otherwise output just '-'.
177 | Similarly {seasonfolder} would normally be seen in a template written
178 | as {seasonfolder?@/} so that the path separator is only included if
179 | {seasonfolder} is defined.
180 |
181 | This is akin to the trinary operator in C, if you're a programmer -
182 | up the the '?' is the thing to test, after the '?' and before the ':'
183 | or '}' is the string to output if it is defined ('true'), between the
184 | ':' and '}' is the string to output if it isn't defined ('false').
185 | Where an '@' appears, insert the value of the parameter.
186 |
187 | ## How does it work?
188 |
189 | The tool uses modified hashing to do comparisons. The hashing is
190 | modified by mapping each character through a mapping table first, so
191 | that particular characters can be mapped to another, or ignored
192 | completely. For example, upper case characters are mapped to lower case,
193 | so "UPPER" has the same hash as "upper" and "UpPeR"
194 |
195 | DVR2Plex first builds up a list of hashes for the directories
196 | found in the {destination} directory.
197 |
198 | The matching algorithm isn't confused by differing case, missing
199 | apostrophes, presence or absence of a year or country (e.g.
200 | "hells_kitchen" will match a directory named "Hell's Kitchen (US)" in
201 | the destination.
202 |
203 | This is particularly useful for the worst offenders. For example, if you
204 | have a destination folder called "Marvel's Agents of S.H.I.E.L.D. (2013)".
205 | The fuzzy matching can deal with something in the source like
206 | "marvels.agents.of.shield" and still put it in the correct folder.
207 |
208 | This fixes all-lowercase series for example, or random "Of"/"of" confusion
209 | (or "MythBusters" vs. "Mythbusters")
210 |
211 | Some characters are often dropped, like apostrophes or the trailing
212 | period of an acronym, like S.W.A.T., so those are ignored. "Marvel's"
213 | matches "marvels", "swat" or "S.W.A.T" matches "S.W.A.T."
214 |
215 | All digits are mapped to '0' (though only for the pattern matching), so
216 | we have a constant hash for patterns like S02E10, S01E05, etc. This
217 | allows us to easily find the several patterns we're looking for, very
218 | efficiently. Those patterns are mostly season/episode patterns: SnnEnn,
219 | SnnnnEnn, nXnn, nnXnn and a few less-common variations. We also identify
220 | nnnn-nn-nn as the pattern for the 'first aired' date for Channels DVR
221 | recordings, along with nnnn-nn-nn-nnnn for the date recorded.
222 |
223 | There are in fact two character maps used for hashing. One for finding
224 | patterns, and another for looking up parameters. The main difference is
225 | that the parameter mapping doesn't map all digits to zero.
226 |
227 | Internally, there are three 'dictionaries' used for searching, each is
228 | a simple key-value list, using the hash as the key. The 'main'
229 | dictionary contains all the parameters that will remain the same for
230 | the entire run. This is the dictionary populated from the config files
231 | and command line options. There is a 'file' dictionary, which contains
232 | the parsed value for the last source parsed. This is discarded and
233 | rebuilt for each source, so per-file values don't carry over from one
234 | source to the next.
235 |
236 | The third dictionary is the 'series' one, which is populated with hashes
237 | of the directory names that it finds by doing a scan of the {destination}
238 | directory. The assumption is that these are essentially the canonical
239 | names/destinations for the known TV series. For a given series directory,
240 | there may be more than one dictionary entry, as the hash without the
241 | year or country is included, as well as with it included. So the hash
242 | for "Person of Interest" is stored as well as "Person of Interest (2011)",
243 | both pointing to the target "Person of Interest (2011)" directory.
244 | Thus either form will match, and point to the right destination directory.
245 | This is the mechanism behind the {destfolder} parameter.
246 |
--------------------------------------------------------------------------------
/dictionary.c:
--------------------------------------------------------------------------------
1 | //
2 | // Created by root on 8/22/19.
3 | //
4 | #include "dvr2plex.h"
5 | #include
6 | #include
7 | #include
8 |
9 | #include "dictionary.h"
10 |
11 | tDictionary * createDictionary( const char * name )
12 | {
13 | tDictionary * result = (tDictionary *)calloc( 1, sizeof(tDictionary) );
14 | result->name = name;
15 | return result;
16 | }
17 |
18 | void emptyDictionary( tDictionary * dictionary )
19 | {
20 | tParam * p = dictionary->head;
21 | dictionary->head = NULL;
22 |
23 | while ( p != NULL)
24 | {
25 | if ( p->value != NULL )
26 | {
27 | free( (void *) p->value );
28 | }
29 |
30 | tParam * next = p->next;
31 | free( p );
32 | p = next;
33 | }
34 | }
35 |
36 | void destroyDictionary( tDictionary * dictionary )
37 | {
38 | emptyDictionary( dictionary );
39 | free( dictionary );
40 | }
41 |
42 | void printDictionary( tDictionary * dictionary )
43 | {
44 | if ( dictionary != NULL )
45 | {
46 | debugf( 3, "...%s dictionary...\n", dictionary->name);
47 |
48 | tParam * p = dictionary->head;
49 | while ( p != NULL )
50 | {
51 | debugf( 3, "%16s: \"%s\"\n", lookupHash(p->hash), p->value );
52 | p = p->next;
53 | }
54 | }
55 | }
56 |
57 | int addParam( tDictionary * dictionary, tHash hash, const char * value )
58 | {
59 | int result = -1;
60 |
61 | tParam * p = malloc( sizeof(tParam) );
62 |
63 | if (p != NULL)
64 | {
65 | p->hash = hash;
66 | p->value = strdup( value );
67 |
68 | p->next = dictionary->head;
69 | dictionary->head = p;
70 |
71 | result = 0;
72 | }
73 | return result;
74 | }
75 |
76 | string findValue( tDictionary * dictionary, tHash hash )
77 | {
78 | string result = NULL;
79 | tParam * p = dictionary->head;
80 |
81 | while (p != NULL)
82 | {
83 | if ( p->hash == hash )
84 | {
85 | result = p->value;
86 | break;
87 | }
88 | p = p->next;
89 | }
90 | return result;
91 | }
92 |
--------------------------------------------------------------------------------
/dictionary.h:
--------------------------------------------------------------------------------
1 | //
2 | // Created by root on 8/22/19.
3 | //
4 |
5 | #ifndef DVR2PLEX_DICTIONARY_H
6 | #define DVR2PLEX_DICTIONARY_H
7 |
8 | typedef unsigned long tHash;
9 |
10 | typedef struct tParam {
11 | struct tParam * next;
12 | const char * value;
13 | tHash hash;
14 | } tParam;
15 |
16 | typedef struct {
17 | tParam * head;
18 | const char * name;
19 | } tDictionary;
20 |
21 | tDictionary * createDictionary( const char * name );
22 | void emptyDictionary( tDictionary * dictionary );
23 | void destroyDictionary( tDictionary * dictionary );
24 | string lookupHash( tHash );
25 | void printDictionary( tDictionary * dictionary);
26 | int addParam( tDictionary * dictionary, tHash hash, string value );
27 | string findValue( tDictionary * dictionary, tHash hash );
28 |
29 | #endif // DVR2PLEX_DICTIONARY_H
30 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | [](https://www.codacy.com/manual/paul-chambers/DVR2Plex?utm_source=github.com&utm_medium=referral&utm_content=paul-chambers/DVR2Plex&utm_campaign=Badge_Grade)
2 |
3 | # DVR2Plex
4 |
5 | **Caution:** If you are new to the linux command line, and/or are unfamiliar
6 | with common linux tools like 'find', I wouldn't recommend this as a good first
7 | project because of the danger of overwriting existing files.
8 |
9 | This tool uses some fancy text processing techniques to reformat filename
10 | into another one. To be useful, this tool needs to be used with other Linux
11 | command line tools, e.g. to copy (or hardlink) files to their new location.
12 |
13 | Since DVR2Plex isn't actually doing the copy/hardlinking itself, it
14 | cannot prevent the tool doing the copy from blindly overwriting an existing
15 | high-quality file with a lower quality one. Thus it's best to have your
16 | template generate a destination filename that won't overwrite any existing
17 | files.
18 |
19 | You must accept responsibility for your configuration and use of this tool,
20 | and accept that data loss is a possibility. Be careful when using this tool.
21 |
22 | **Note:** this tool was written for a Linux environment. It *should* work
23 | fine inside WSL (Windows Services for Linux), but has had little testing
24 | there.
25 |
26 | ## Why does this exist?
27 |
28 | I'm a long-time user of [Plex](https://plex.tv/), and use related tools to
29 | supply content for the Plex content library. Plex has a preferred way it
30 | likes to see the library organized, and things generally go more smoothly
31 | if everything uses the same organization and naming strategy.
32 |
33 | I'm also a fan of the [Channels DVR](https://getchannels.com/dvr-server/),
34 | which is particularly well implemented and has some features that I find
35 | particularly useful. It keeps its recordins in a private directory, and
36 | while it is well-orgainized, it's in a way that's a little different to
37 | the structure that Plex prefers. Most importantly, Channels DVR 'owns'
38 | the files in that folder, and other software must respect that, and not
39 | 'pull the rug out' from under Channels by messing with those files.
40 |
41 | While you could point Plex at the Channels directories holding the
42 | recordings, and Plex will figure things out. But it should treat those
43 | directory contents as read-only, otherwise Plex will be altering files
44 | that Channels owns.
45 |
46 | *So,* I initially wrote a shell script that hardlinked the recordings
47 | Channels DVR made in its private directory into the 'right place' in
48 | my Plex library. A hard link doesn't use any more disk space, but does
49 | mean the hard link in the Plex Library can bre moved and/or renamed
50 | without affecting the one in the Channels DVR 'private' directory.
51 | The inverse is also true - the Channels DVR can delete its file in
52 | the 'private' directory, but the other link to it in the Plex library
53 | will remain. Which is very handy if you want to tell Channels DVR to
54 | 'only keep *n* episodes' (or [kmttg](https://sourceforge.net/projects/kmttg),
55 | for that matter).
56 |
57 | *Problem solved, right?* Well, mostly...
58 |
59 | The biggest issue with such a simple approach is that the world
60 | hasn't yet settled on how a series is named, and possibly never will.
61 | For example, a series like "Marvel's Agents of S.H.I.E.L.D." there
62 | are a number of variations on that title that you'll see in the wild.
63 | Variations like "Marvel's Agents of S.H.I.E.L.D. (2013)", or
64 | "Marvels Agents of S.H.I.E.L.D" (no single quote, no trailing period),
65 | all the way to "marvels.agents.of.shield".
66 |
67 | If that isn't accounted for, then differently-named directories will
68 | accumulate, containing episodes of the same series, often duplicates.
69 | This only gets worse as the number of content sources increases.
70 |
71 | Not good. This is the itch this tool scratches.
72 |
73 | ## OK, but what does it *do?*
74 |
75 | In a nutshell, it's a specialized string manipulation tool. You feed it
76 | the name of a media file, it parses out the series, season, episode,
77 | episode title, etc. from the name, and provides a 'template' system
78 | that allows you to easily reassemble a new name for the destination
79 | from the parts it extracted from the source filename.
80 |
81 | *That sounds like something you could do with `sed` or `awk`. So why
82 | write this?*
83 |
84 | The pattern matching is done in a loose/fuzzy way that would be
85 | impractical to do in bash script or command-line string manipulation
86 | tools.
87 |
88 | It uses a fundamentally different technique - character-mapped hashing
89 | - than the usual simple character-by-character string comparison or
90 | regular expression methods.
91 |
92 | See "How does it work?" below, if you're curious about the details.
93 |
94 | The 'template' describes the form of the string that this tool should
95 | output. The component parts are substituted in the appropriate place
96 | where you put something like "{episode}". There are quite a number of
97 | these parameters:
98 |
99 | | parameter name | description |
100 | |--- |--- |
101 | | {source} | The path to the source file, as passed to this tool. |
102 | | {path} | The 'dirname' part of the source (no trailing slash) |
103 | | {basename} | The 'basename' of the source (without the extension) |
104 | | {extension} | The extension. separate so that if what you want to do is convert containers, you can use something like {path}/{basename}.mkv |
105 | | {series} | The raw name of the series (as extracted from the source |
106 | | {season} | Always at least two digits, zero-padded |
107 | | {seasonfolder} | If the season is zero, this will be "Specials", otherwise equivalent to "Season {season}"
108 | | {episode} | Always at least two digits, zero-padded |
109 | | {title} | The episode title |
110 | | {destseries} | This is the target folder that the tool determined, by a fuzzy match, is the right destination for the file.
More details below. |
111 | | {destination} | The destination directory for the file. Also used as part of the fuzzy matching |
112 | | {firstaired} | the date this episode first aired *(specific to Channels DVR files)* |
113 | | {daterecorded} | the date/time Channels DVR recorded this *(specific to Channels DVR files)* |
114 | | {template} | it's a parameter too, though you can't use it in a template |
115 |
116 | This is only the predefined list of parameters that the parsing will
117 | pre-populate automatically - except for {destination} and {template},
118 | which need to be defined by the user. They can either be defined on the
119 | command line, or in a config file - the tool looks for
120 | `/etc/DVR2Plex.conf` and `~/.config/DVR2Plex.conf`, then will
121 | process the config file defined by the `-c` command line option, before
122 | finishing with any command line options. Parameters can be defined
123 | multiple times, the last one wins. So you could, for example, define a
124 | a default {title} as '(unknown)' in a config file, and it would be used
125 | if the file name parsing didn't find an episode title.
126 |
127 | You may also define your own parameters in the config file, and use them
128 | in the template. And if the output-building code can't find a parameter
129 | name that matches in its dictionaries, it will also look for an
130 | environment variable with that name (case-sensitive, in this case). So
131 | {HOME} will be replaced by the path to the user's home directory (i.e.
132 | the equivalent of '~')
133 |
134 | The assumption is that a config file would contain at least the
135 | {destination} and {template} parameters, since those are likely to be
136 | the consistent on a given machine. For example. `/etc/DVR2Plex.conf`
137 | might contain:
138 | ```
139 | destination = /home/video/TV
140 | template = "{source}" "{destination}/{destseries?@/}{seasonfolder?@/}{destseries?@ }{season?S@}{episode?E@:-}{title? @}{extension}"
141 | ```
142 | So assuming that the source file was
143 | `/home/Channels/TV/Person of Interest/Person of Interest S02E16 2013-02-21 Relevance 2018-12-30-0000.mpg`
144 | and a directory existed called `/home/video/TV/Person of Interest (2011)`
145 | then that template would output:
146 |
147 | `"/home/video/TV/Person of Interest/Person of Interest S02E16 2013-02-21 Relevance 2018-12-30-0000.mpg" "/home/video/TV/Person of Interest (2011)/Season 02/Person of Interest (2011) S02E16 Relevence.mpg"`
148 |
149 | but perhaps more impressive is that a source file of `/home/paul/downloads/person.of.interest.2x16.relevence.mpg`
150 | would also create the same destination of
151 | `"/home/video/TV/Person of Interest (2011)/Season 02/Person of Interest (2011) S02E16 Relevence.mpg"`
152 |
153 | **Caution:** It's a good practice to include something in the template
154 | that is guaranteed to make the generated name unique, so that it won't
155 | overwrite an existing file in the destination (portentially a lower
156 | quality version). Since Channel DVR recordings have an .mpg extension,
157 | you'll probably be OK, but better safe than sorry.
158 |
159 | ### Conditional Expansions
160 | *But wait, what on earth does {episode?E@:-} mean?*
161 |
162 | When a parameter isn't defined, it expands to nothing. Which is all well
163 | and good, except if there's some surrounding characters that need to
164 | disappear too. {episode?E@:-} means 'if {episode} is defined, output 'E'
165 | followed by the contents of {episode}, otherwise output just '-'.
166 | Similarly {seasonfolder} would normally be seen in a template written
167 | as {seasonfolder?@/} so that the path separator is only included if
168 | {seasonfolder} is defined.
169 |
170 | This is akin to the trinary operator in C, if you're a programmer -
171 | up the the '?' is the thing to test, after the '?' and before the ':'
172 | or '}' is the string to output if it is defined ('true'), between the
173 | ':' and '}' is the string to output if it isn't defined ('false').
174 | Where an '@' appears, insert the value of the parameter.
175 |
176 | ## How does it work?
177 |
178 | The tool uses modified hashing to do comparisons. The hashing is
179 | modified by mapping each character through a mapping table first, so
180 | that particular characters can be mapped to another, or ignored
181 | completely. For example, upper case characters are mapped to lower case,
182 | so "UPPER" has the same hash as "upper" or "UpPeR"
183 |
184 | DVR2Plex first builds up a list of hashes for the directories
185 | found in the {destination} directory.
186 |
187 | The matching algorithm is not phased by differing case, missing
188 | apostrophes, presence or absence of a year or country (e.g.
189 | "hells_kitchen" will match a directory named "Hell's Kitchen (US)" in
190 | the destination.
191 |
192 | This is particularly useful for the worst offenders. For example, if you
193 | have a destination folder called "Marvel's Agents of S.H.I.E.L.D. (2013)".
194 | The fuzzy matching can deal with something in the source like
195 | "marvels.agents.of.shield" and still put it in the correct folder.
196 |
197 | This fixes all-lowercase series for example, or random "Of"/"of" confusion
198 | (or "MythBusters" vs. "Mythbusters")
199 |
200 | Some characters are often dropped, like apostrophes or the trailing
201 | period of an acronym, like S.W.A.T., so those are ignored. "Marvel's"
202 | matches "marvels", "swat" or "S.W.A.T" matches "S.W.A.T."
203 |
204 | All digits are mapped to '0' (though only for the pattern matching), so
205 | we have a constant hash for patterns like S02E10, S01E05, etc. This
206 | allows us to easily find the several patterns we're looking for, very
207 | efficiently. Those patterns are mostly season/episode patterns: SnnEnn,
208 | SnnnnEnn, nXnn, nnXnn and a few less-common variations. We also identify
209 | nnnn-nn-nn as the pattern for the 'first aired' date for Channels DVR
210 | recordings, along with nnnn-nn-nn-nnnn for the date recorded.
211 |
212 | There are in fact two character maps used for hashing. One for finding
213 | patterns, and another for looking up parameters. The main difference is
214 | that the parameter mapping doesn't map all digits to zero.
215 |
216 | Internally, there are three 'dictionaries' used for searching, each is
217 | a simple key-value list, using the hash as the key. The 'main'
218 | dictionary contains all the parameters that will remain the same for
219 | the entire run. This is the dictionary populated from the config files
220 | and command line options. There is a 'file' dictionary, which contains
221 | the parsed value for the last source parsed. This is discarded and
222 | rebuilt for each source, so per-file values don't carry over from one
223 | source to the next.
224 |
225 | The third dictionary is the 'series' one, which is populated with hashes
226 | of the directory names that it finds by doing a scan of the {destination}
227 | directory. The assumption is that these are essentially the canonical
228 | names/destinations for the known TV series. For a given series directory,
229 | there may be more than one dictionary entry, as the hash without the
230 | year or country is included, as well as with it included. So the hash
231 | for "Person of Interest" is stored as well as "Person of Interest (2011)",
232 | both pointing to the target "Person of Interest (2011)" directory.
233 | Thus either form will match, and point to the right destination directory.
234 | This is the mechanism behind the {destfolder} parameter.
235 |
--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-architect
--------------------------------------------------------------------------------
/dvr2plex.c:
--------------------------------------------------------------------------------
1 | /**
2 | Copyright © Paul Chambers, 2019.
3 |
4 | @ToDo Switch to UTF-8 string handling, rather than relying on ASCII backwards-compatibility
5 | */
6 |
7 | #define _XOPEN_SOURCE 700
8 | #include
9 |
10 | #include "dvr2plex.h"
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include // for basename()
20 | #include
21 | #define __USE_MISC // dirent.d_type is linux-specific, apparently
22 | #include
23 | #define __USE_GNU
24 | #include
25 | #include
26 |
27 | #include
28 |
29 | #include "dictionary.h"
30 |
31 |
32 | /* hashes for patterns we are scanning for in the filename
33 | this hash table is used to generate hashes used to match patterns.
34 | it maps all digits to the same value, maps uppercase letters to
35 | lowercase, and ignores several characters completely.
36 | */
37 | #include "patterns.h"
38 |
39 | /*
40 | Hashes for the keywords/parameter names in the template. This hash table is also
41 | used for series names.
42 |
43 | Periods are ignored, because the trailing one is often omitted of series like
44 | "S.W.A.T." and "Marvel's Agents of S.H.I.E.L.D.". By ignoring periods,
45 | "S.W.A.T.", "S.W.A.T" and "SWAT" will all result in the same hash value.
46 |
47 | Since periods may also be used as a separator, we have to treat ' ' and '_' as
48 | equivalent, or the hash for a space-separated name won't match the hash of a
49 | period- or underscore-seperated one.
50 |
51 | In other words, ' ', '_' and '.' do not contribute to the series hash. Similarly,
52 | apostrophes are also often omitted ("Marvel's" becomes "Marvels"), so it is
53 | similarly ignored when generating a hash, along with '?' (e.g. "Whose Line Is It
54 | Anyway?") and '!' ("I'm a Celebrity...Get Me Out of Here!").
55 |
56 | "Marvel's Agents of S.H.I.E.L.D. (2017)" is perhaps one of the most difficult
57 | matching examples I've seen in the wild. There are so many ways to mangle that.
58 |
59 | ':' is usually converted to '-' or omitted entirely, so ignore those, too.
60 |
61 | Left and right brackets are also mapped to be equivalent, e.g. [2017] has the
62 | same hash as (2017).
63 | */
64 | #include "keywords.h"
65 |
66 | string gMyName;
67 | int gDebugLevel = 0;
68 | unsigned int gNextYear = 1895;
69 |
70 | tDictionary * gMainDict;
71 | tDictionary * gPathDict;
72 | tDictionary * gFileDict;
73 | tDictionary * gSeriesDict;
74 |
75 | string gCachedPath = NULL;
76 | string gCachedSeries = NULL;
77 |
78 | typedef struct sToken
79 | {
80 | struct sToken * next;
81 | string start;
82 | string end;
83 | tHash hash;
84 | unsigned char seperator;
85 | } tToken;
86 |
87 | tToken gTokenList;
88 |
89 | /**
90 | * trim any trailing whitespace from the end of the string
91 | *
92 | * @param line line to be trimmed
93 | */
94 | void trimTrailingWhitespace(char * line)
95 | {
96 | char * t = line;
97 | char * nwsp = line;
98 |
99 | if ( t != NULL )
100 | {
101 | while (*t != '\0')
102 | {
103 | if (!isspace(*t++))
104 | {
105 | // note: t has already been incremented
106 | nwsp = t;
107 | }
108 | }
109 | *nwsp = '\0';
110 | }
111 | }
112 |
113 | string lookupHash(tHash hash)
114 | {
115 | tKeywordHashMapping * keywordMap = KeywordHashLookup;
116 |
117 | while ( keywordMap->key != 0 )
118 | {
119 | if ( hash == keywordMap->key )
120 | {
121 | return keywordMap->label;
122 | }
123 | keywordMap++;
124 | }
125 |
126 | tPatternHashMapping * patternMap = PatternHashLookup;
127 |
128 | while ( patternMap->key != 0 )
129 | {
130 | if ( hash == patternMap->key )
131 | {
132 | return patternMap->label;
133 | }
134 | patternMap++;
135 | }
136 |
137 | return "";
138 | }
139 |
140 | /**
141 | * @brief look in the three dictionaries for the first occurance of a hash value
142 | * @param hash
143 | * @return
144 | */
145 | string findParam( tHash hash )
146 | {
147 | string result;
148 |
149 | result = findValue( gFileDict, hash );
150 | if ( result == NULL )
151 | {
152 | result = findValue( gPathDict, hash );
153 | }
154 | if ( result == NULL )
155 | {
156 | result = findValue( gMainDict, hash );
157 | }
158 | return result;
159 | }
160 |
161 | /**
162 | Hashes the 'series' using the 'keyword' hash table, since comparing series names needs
163 | slightly different logic than scanning for patterns. Separators (spaces, periods,
164 | underscores) are ignored completely. As are \', !, amd ?, since those are frequently
165 | omitted. Upper case letters are mapped to lower case since those are also very
166 | inconsistent (no UTF-8 handling yet, though). and '&' is expanded to 'and' in the
167 | hash, so both forms will hash to the same value.
168 |
169 | Since a series name may or may not be suffixed by a year or country surrounded
170 | by brackets (e.g. (2019) or (US)). So a hash is added whenever a left bracket
171 | is encountered, so the hash for 'Some Series' and 'Some Series (2019)' are both
172 | stored in the series dictionary, so there will be a hash available to match
173 | either with or without the suffix.
174 | */
175 | void addSeries( string series )
176 | {
177 | tHash result = 0;
178 | unsigned char * s = (unsigned char *)series;
179 | unsigned char c;
180 |
181 | do {
182 | c = kKeywordMap[ *s++ ];
183 | switch ( c )
184 | {
185 | // we hash the '&' character as if 'and' was used. so both forms generate the same hash
186 | // e.g. the hash of 'Will & Grace' will match the hash of 'Will and Grace'
187 | case '&':
188 | result = fKeywordHashChar( result, 'a' );
189 | result = fKeywordHashChar( result, 'n' );
190 | result = fKeywordHashChar( result, 'd' );
191 | break;
192 |
193 | case kKeywordLBracket:
194 | // we found something bracketed, e.g. (uk) or (2019), so we also add the
195 | // intermediate hash to the dictionary, before we hash the bracketed content.
196 | // Then if we hash the same series with the year omitted, for example, will
197 | // still match something. Though we can't do much about a file that omits a
198 | // a year or country, e.g. 'MacGyver' instead of 'MacGyver (2016)', or
199 | // 'Hell's Kitchen' instead of 'Hell's Kitchen (US)'
200 | //
201 | // Note: if there are multiple left brackets encountered, there will be
202 | // multiple intermediate hashes added.
203 |
204 | addParam( gSeriesDict, result, series );
205 | result = fKeywordHashChar( result, c );
206 | break;
207 |
208 | case '\0':
209 | case kKeywordSeparator:
210 | case kKeywordIgnored:
211 | break;
212 |
213 | default:
214 | result = fKeywordHashChar( result, c );
215 | break;
216 | }
217 | } while ( c != '\0' );
218 |
219 | // also add the hash of the full string, including any trailing bracketed stuff
220 | addParam( gSeriesDict, result, series );
221 | }
222 |
223 | static int scanDirFilter( const struct dirent * entry)
224 | {
225 | int result = 0;
226 |
227 | result = ( entry->d_name[0] != '.' && entry->d_type == DT_DIR );
228 |
229 | // debugf( 3, "%s, 0x%x, %d\n", entry->d_name, entry->d_type, result );
230 | return result;
231 | }
232 |
233 | int buildSeriesDictionary( string path )
234 | {
235 | struct dirent **namelist;
236 | int n;
237 |
238 | n = scandir( path, &namelist, scanDirFilter, alphasort);
239 | if ( n < 0 ) {
240 | perror("scandir");
241 | return n;
242 | }
243 |
244 | for ( int i = 0; i < n; ++i )
245 | {
246 | addSeries( namelist[ i ]->d_name );
247 | free( namelist[ i ] );
248 | }
249 | free(namelist);
250 |
251 | /* printDictionary( dictionary ); */
252 |
253 | return 0;
254 | }
255 |
256 | void addSeasonEpisode( unsigned int season, unsigned int episode )
257 | {
258 | char temp[50];
259 |
260 | snprintf( temp, sizeof(temp), "%02u", season );
261 | addParam( gFileDict, kKeywordSeason, temp );
262 | if ( season == 0 || episode == 0 )
263 | {
264 | addParam( gFileDict, kKeywordSeasonFolder, "Specials" );
265 | }
266 | else
267 | {
268 | snprintf( temp, sizeof(temp), "Season %02u", season );
269 | addParam( gFileDict, kKeywordSeasonFolder, temp );
270 | }
271 |
272 | snprintf( temp, sizeof(temp), "%02u", episode );
273 | addParam( gFileDict, kKeywordEpisode, temp );
274 | }
275 |
276 | void storeSeries( string series )
277 | {
278 | string result = series;
279 | string ptr, end;
280 | tHash hash;
281 | unsigned char c;
282 |
283 | ptr = series;
284 | hash = 0;
285 |
286 | addParam( gFileDict, kKeywordSeries, series );
287 |
288 | // regenerate the hash incrementally, checking at each separator.
289 | // remember the longest match, i.e. keep looking until the end of the string
290 | do {
291 | c = kKeywordMap[ (unsigned char)*ptr ];
292 | switch ( c )
293 | {
294 | case kKeywordSeparator:
295 | case '\0':
296 | /* let's see if we have a match */
297 | debugf( 4, "checking: 0x%016lx\n", hash );
298 |
299 | string match = findValue( gSeriesDict, hash );
300 | if ( match != NULL)
301 | {
302 | result = match;
303 | debugf( 3, "matched %s\n", result );
304 | end = ptr;
305 | }
306 | break;
307 |
308 | case '&':
309 | hash = fPatternHashChar( hash, 'a' );
310 | hash = fPatternHashChar( hash, 'n' );
311 | hash = fPatternHashChar( hash, 'd' );
312 | break;
313 |
314 | default:
315 | hash = fPatternHashChar( hash, c );
316 | break;
317 | };
318 | ptr++;
319 | } while ( c != '\0' );
320 |
321 | if ( result != series )
322 | {
323 | if ( *end != '\0' )
324 | {
325 | /* if the run is longer than the match with the series name,
326 | then store the trailing remnant as the episode title */
327 | addParam( gFileDict, kKeywordTitle, (string) end + 1 );
328 | *(char *) end = '\0';
329 | }
330 | }
331 | addParam( gFileDict, kKeywordDestSeries, result );
332 | }
333 |
334 | int storeToken( tHash hash, string value )
335 | {
336 | unsigned int season = 0;
337 | unsigned int episode = 0;
338 | unsigned int year = 0;
339 | char temp[20];
340 | string seriesName;
341 |
342 | switch (hash)
343 | {
344 | case kPatternSnnEnn: // we found 'SnnEnn' or
345 | case kPatternSyyyyEnn: // SyyyyEnnn
346 | case kPatternSnnEn: // SnnEn
347 | case kPatternSnEnn: // SnEnn
348 | case kPatternSnEn: // SnEn
349 | debugf( 3,"SnnEnn: %s\n", value);
350 | sscanf( value, "%*1c%u%*1c%u", &season, &episode ); // ignore characters since we don't know their case
351 | addSeasonEpisode( season, episode );
352 | break;
353 |
354 | case kPatternEnnn:
355 | debugf( 3,"Ennn: %s\n", value);
356 | sscanf( value, "%*1c%u", &episode ); // ignore characters since we don't know their case
357 | season = episode / 100;
358 | episode %= 100;
359 | addSeasonEpisode( season, episode );
360 | break;
361 |
362 | case kPatternEnnnn:
363 | debugf( 3,"Ennnn: %s\n", value);
364 | sscanf( value, "%*1c%u", &episode ); // ignore characters since we don't know their case
365 | unsigned int divisor = 100;
366 | /* see if there's a season number to extract */
367 | if ( ((episode / divisor) % 10) == 0 )
368 | {
369 | /* least significant digit of season is zero, so we can increase the divisor by 10 */
370 | divisor *= 10;
371 | }
372 | season = episode / divisor;
373 | episode %= divisor;
374 | addSeasonEpisode( season, episode );
375 | break;
376 |
377 | case kPatternnXnn:
378 | case kPatternnnXnn:
379 | debugf( 3, "nnXnn: %s\n", value);
380 | sscanf( value, "%u%*1c%u", &season, &episode ); // ignore characters since we don't know their case
381 | addSeasonEpisode( season, episode );
382 | break;
383 |
384 | case kPatternYear:
385 | sscanf( value, "%*1c%u%*1c", &year ); // ignore characters since we don't know their case
386 | if ( 1890 < year && year <= gNextYear )
387 | {
388 | snprintf( temp, sizeof( temp ), "%u", year );
389 | addParam( gFileDict, kKeywordYear, temp );
390 | }
391 | debugf( 3, "year: %u\n", year );
392 | break;
393 |
394 | case kPatternCountryUSA:
395 | addParam( gFileDict, kKeywordCountry, "USA" );
396 | break;
397 |
398 | case kPatternCountryUS:
399 | addParam( gFileDict, kKeywordCountry, "US" );
400 | break;
401 |
402 | case kPatternCountryUK:
403 | addParam( gFileDict, kKeywordCountry, "UK" );
404 | break;
405 |
406 | case kPatternNoMatch:
407 | seriesName = findParam( kKeywordSeries );
408 | if ( seriesName == NULL )
409 | {
410 | debugf( 3, "series: %s\n", value );
411 | storeSeries( value );
412 | }
413 | else
414 | {
415 | debugf( 3, "title: %s\n", value );
416 | addParam( gFileDict, kKeywordTitle, value );
417 | }
418 | break;
419 |
420 | // kPatternTwoDigits:
421 | // kPatternFourDigits:
422 | // kPatternSixDigits:
423 | // kPatternEightDigits:
424 | default:
425 | break;
426 | }
427 | return 0;
428 | }
429 |
430 | tHash checkHash( tHash hash)
431 | {
432 | switch (hash)
433 | {
434 | case kPatternSnnEnn: // SnnEnn
435 | case kPatternSyyyyEnn: // SnnnnEnn
436 | case kPatternSnnEn: // SnnEn
437 | case kPatternSnEnn: // SnEnn
438 | case kPatternSnEn: // SnEn
439 | case kPatternEnnn: // Ennn
440 | case kPatternEnnnn: // Ennnn
441 | case kPatternnXnn: // nXnn
442 | case kPatternnnXnn: // nnXnn
443 | case kPatternTwoDigits: // nn
444 | case kPatternFourDigits: // nnnn
445 | case kPatternSixDigits: // nnnnnn
446 | case kPatternEightDigits: // nnnnnnnn
447 | case kPatternCountryUSA: // (USA)
448 | case kPatternCountryUS: // (US)
449 | case kPatternCountryUK: // (UK)
450 | case kPatternYear: // (nnnn)
451 | break;
452 |
453 | default:
454 | hash = kPatternNoMatch;
455 | break;
456 | }
457 | return hash;
458 | }
459 |
460 | void tokenizeName( string originalName )
461 | {
462 | gTokenList.next = NULL;
463 |
464 | string name = strdup( originalName ); // copy it, because we'll terminate strings in place as we go
465 |
466 | if ( name != NULL)
467 | {
468 | unsigned char c;
469 |
470 | string start = name;
471 | string ptr = start;
472 | tHash hash = 0;
473 |
474 | tToken * token = &gTokenList;
475 |
476 | do {
477 | c = kPatternMap[ *(unsigned char *)ptr ];
478 | switch ( c )
479 | {
480 | case kPatternSeperator:
481 | case '\0':
482 | // reached the end of a token
483 | token->next = calloc( 1, sizeof(tToken) );
484 | token = token->next;
485 | if ( token != NULL )
486 | {
487 | token->hash = checkHash( hash );
488 | token->start = start;
489 | token->end = ptr;
490 | token->seperator = *ptr;
491 | *(char *)ptr = '\0';
492 | }
493 | // only prepare for the next run if we're not at the end of the string
494 | if ( c != '\0' )
495 | {
496 | // skip over a run of kPatternSeperator, if present (e.g. ' - ')
497 | do { ptr++; } while ( kPatternMap[ *(unsigned char *)ptr ] == kPatternSeperator );
498 | start = ptr;
499 | hash = 0;
500 | }
501 | break;
502 |
503 | case '&':
504 | hash = fPatternHashChar( hash, 'a' );
505 | hash = fPatternHashChar( hash, 'n' );
506 | hash = fPatternHashChar( hash, 'd' );
507 | ptr++;
508 | break;
509 |
510 | default:
511 | hash = fPatternHashChar( hash, c );
512 | ptr++;
513 | break;
514 | };
515 | } while ( c != '\0' );
516 |
517 | token = gTokenList.next;
518 | while ( token != NULL )
519 | {
520 | debugf( 4, "token: \'%s\', \'%s\' (%c)\n", lookupHash( token->hash ), token->start, token->seperator );
521 | token = token->next;
522 | }
523 | }
524 | }
525 |
526 | void freeTokenList( void )
527 | {
528 | tToken *nextToken;
529 | tToken * token = gTokenList.next;
530 | gTokenList.next = NULL;
531 | while ( token != NULL )
532 | {
533 | nextToken = token->next;
534 | free( token );
535 | token = nextToken;
536 | }
537 | }
538 |
539 | /*
540 | * Channels DVR:
541 | * air date: yyyy-mm-dd
542 | * recorded: yyyy-mm-dd-hhss
543 | * TVMosaic
544 | * recorded: hhss-yyyymmdd
545 | *
546 | */
547 | void mergeDigits( void )
548 | {
549 | tToken * token[4];
550 |
551 | token[0] = gTokenList.next;
552 |
553 | while ( token[0] != NULL)
554 | {
555 | token[1] = token[0]->next;
556 | switch ( token[0]->hash )
557 | {
558 | // Channels DVR: YYYY-mm-dd
559 | // YYYY-mm-dd-hhss
560 | // TVMosaic: HHSS-yyyymmdd
561 | case kPatternFourDigits:
562 | if ( token[1] != NULL)
563 | {
564 | token[2] = token[1]->next;
565 |
566 | switch ( token[1]->hash )
567 | {
568 | // Channels DVR: YYYY-MM-dd
569 | // YYYY-MM-dd-hhss
570 | case kPatternTwoDigits:
571 | if ( token[1]->seperator == '-' && token[2] != NULL)
572 | {
573 | switch ( token[2]->hash )
574 | {
575 | // Channels DVR: YYYY-MM-DD
576 | // YYYY-MM-DD-hhss
577 | case kPatternTwoDigits:
578 | token[3] = token[2]->next;
579 | if (token[3] != NULL)
580 | {
581 | switch ( token[3]->hash )
582 | {
583 | case kPatternFourDigits:
584 | // ok, looks like we have YYYY-MM-DD-HHSS
585 | token[0]->next = token[3]->next;
586 | *(char *) token[0]->end = '-';
587 | *(char *) token[1]->end = '-';
588 | *(char *) token[2]->end = '-';
589 | token[0]->end = token[3]->end;
590 | token[0]->hash = kKeywordDateRecorded;
591 | free( token[1] );
592 | free( token[2] );
593 | free( token[3] );
594 | break;
595 |
596 | default:
597 | // ok, looks like we have YYYY-MM-DD
598 | token[0]->next = token[2]->next;
599 | *(char *) token[0]->end = '-';
600 | *(char *) token[1]->end = '-';
601 | token[0]->end = token[2]->end;
602 | token[0]->hash = kKeywordFirstAired;
603 | free( token[1] );
604 | free( token[2] );
605 | break;
606 | }
607 | }
608 | break;
609 |
610 | default:
611 | break;
612 | }
613 | }
614 | break;
615 |
616 | // TVMosaic: HHSS-YYYYMMDD
617 | case kPatternEightDigits:
618 | token[0]->next = token[1]->next;
619 | *(char *) token[0]->end = '-';
620 | token[0]->end = token[1]->end;
621 | token[0]->hash = kKeywordDateRecorded;
622 | free( token[1] );
623 | break;
624 |
625 | default:
626 | token[0]->hash = kPatternNoMatch;
627 | break;
628 | }
629 | }
630 | else
631 | {
632 | // last token, therefore four trailing digits, no metapattern
633 | token[0]->hash = kPatternNoMatch;
634 | }
635 | break;
636 |
637 | default:
638 | // not kPatternFourDigits, ignore it.
639 | break;
640 | }
641 | token[0] = token[0]->next;
642 | }
643 | }
644 |
645 | void mergeNoMatch( void )
646 | {
647 | tToken * token;
648 | tToken * nextToken;
649 |
650 | token = gTokenList.next;
651 | while ( token != NULL)
652 | {
653 | nextToken = token->next;
654 | if ( nextToken != NULL && token->hash == kPatternNoMatch && nextToken->hash == kPatternNoMatch )
655 | {
656 | // combine the two kPatternNoMatch tokens
657 | token->next = nextToken->next;
658 | *(char *)token->end = ' ';
659 | token->end = nextToken->end;
660 | free( nextToken );
661 | }
662 | else
663 | {
664 | token = token->next;
665 | }
666 | }
667 |
668 | /* Some tokens should also be appended as a suffix, while also retaining the token */
669 | token = gTokenList.next;
670 | while ( token != NULL)
671 | {
672 | nextToken = token->next;
673 | if ( token->hash == kPatternNoMatch && nextToken != NULL )
674 | {
675 | switch ( nextToken->hash )
676 | {
677 | /* The tokens we treat as suffixes */
678 | case kPatternCountryUK:
679 | case kPatternCountryUS:
680 | case kPatternCountryUSA:
681 | case kPatternYear:
682 | /* extend the kPatternNoMatch token to include the suffix */
683 | *(char *)token->end = ' ';
684 | token->end = nextToken->end;
685 | break;
686 |
687 | default:
688 | break;
689 | }
690 | }
691 | token = token->next;
692 | }
693 | }
694 |
695 | int parseName( string name )
696 | {
697 | tToken * token = gTokenList.next;
698 |
699 | tokenizeName( name );
700 |
701 | mergeDigits();
702 | mergeNoMatch();
703 |
704 | debugf( 4, "%s\n", "after merging" );
705 | token = gTokenList.next;
706 | while ( token != NULL)
707 | {
708 | debugf( 4, "token: \'%s\', \'%s\' (%c)\n", lookupHash( token->hash ), token->start, token->seperator );
709 |
710 | storeToken( token->hash, token->start );
711 | token = token->next;
712 | }
713 |
714 | freeTokenList();
715 |
716 | return 0;
717 | }
718 |
719 |
720 | /*
721 | * carve up the path into directory path, basename and extension
722 | * then pass basename onto parseName() to be processed
723 | */
724 | int parsePath( string path )
725 | {
726 | int result = 0;
727 |
728 | addParam( gFileDict, kKeywordSource, path );
729 |
730 | string lastPeriod = strrchr( path, '.' );
731 | string lastChar = path + strlen(path);
732 | if ( lastPeriod != NULL && (lastChar - lastPeriod) < 5 )
733 | {
734 | addParam( gFileDict, kKeywordExtension, lastPeriod );
735 | }
736 | else
737 | {
738 | lastPeriod = lastChar;
739 | }
740 |
741 | string lastSlash = strrchr( path, '/' );
742 | if ( lastSlash != NULL )
743 | {
744 | string p = strndup( path, lastSlash - path );
745 | addParam( gFileDict, kKeywordPath, p );
746 | free( (void *)p );
747 |
748 | ++lastSlash;
749 | }
750 | else
751 | {
752 | lastSlash = path; // no directories prefixed
753 | }
754 |
755 | string basename = strndup( lastSlash, lastPeriod - lastSlash );
756 | addParam( gFileDict, kKeywordBasename, basename );
757 | parseName( basename );
758 | free( (void *)basename );
759 |
760 | return result;
761 | }
762 |
763 | string buildString( string template )
764 | {
765 | string result = NULL;
766 | string t = template;
767 | char * s; // pointer into the returned string
768 |
769 | result = calloc( 1, 32768 );
770 | s = (char *)result;
771 |
772 | if ( s != NULL )
773 | {
774 | unsigned char c = *t++; // unsigned because it is used as an array subscript when calculating the hash
775 | while ( c != '\0' )
776 | {
777 | unsigned long hash;
778 | string k;
779 |
780 | switch (c)
781 | {
782 | case '{': // start of keyword
783 | k = t; // remember where the keyword starts
784 |
785 | // scan the keyword and generate its hash
786 | hash = 0;
787 |
788 | c = *t++;
789 | while ( c != '\0' && c != '}' && c != '?' )
790 | {
791 | if ( kKeywordMap[ c ] != kKeywordSeparator ) /* we ignore some characters when calculating the hash */
792 | {
793 | hash = fKeywordHashChar( hash, c );
794 | }
795 | c = *t++;
796 | }
797 |
798 | if ( hash != kKeywordTemplate ) // don't want to expand a {template} keyword in a template!
799 | {
800 | string value = findParam( hash );
801 |
802 | if ( value == NULL ) // not in the dictionaries, check for an environment variable
803 | {
804 | string envkey = strndup( k, t - k - 1 );
805 | value = getenv( envkey );
806 | if ( value != NULL )
807 | {
808 | debugf( 3, "env=\"%s\", value=\"%s\"\n", envkey, value );
809 | }
810 | free( (void *)envkey );
811 | }
812 |
813 | if ( c != '?' )
814 | {
815 | // end of keyword, and not the beginning of a ternary expression
816 | if ( value != NULL )
817 | {
818 | s = stpcpy( s, value );
819 | }
820 | }
821 | else
822 | { // ternary operator, like {param?true:false} (true or false can be absent)
823 |
824 | c = *t++;
825 |
826 | if ( value != NULL )
827 | {
828 | // copy the 'true' clause
829 | while ( c != '}' && c != ':' && c != '\0' )
830 | {
831 | if ( c != '@' )
832 | {
833 | *s++ = c;
834 | }
835 | else
836 | {
837 | s = stpcpy( s, value );
838 | }
839 |
840 | c = *t++;
841 | }
842 |
843 | if ( c == ':' )
844 | {
845 | // skip over the 'false' clause
846 | while ( c != '\0' && c != '}' )
847 | {
848 | c = *t++;
849 | }
850 | }
851 | }
852 | else // if undefined, skip over 'true' pattern, find the ':' (or trailing '}')
853 | {
854 | // value is undefined, so skip ahead to the false clause (or keyword end)
855 | while ( c != ':' && c != '}' && c != '\0' )
856 | {
857 | c = *t++;
858 | }
859 |
860 | if ( c == ':' ) // did we find the 'false' clause?
861 | {
862 | c = *t++; // yep, so swallow the colon
863 | // copy the 'false' clause into the string
864 | // no '@' processing, as the parameter is not defined
865 | while ( c != '\0' && c != '}' )
866 | {
867 | *s++ = c;
868 | c = *t++;
869 | }
870 | }
871 | }
872 | }
873 | } // if !{template}
874 | break;
875 |
876 | case '\\': // next template character is escaped, not interpreted, e.g. \{
877 | c = *t++;
878 | *s++ = c;
879 | break;
880 |
881 | default:
882 | *s++ = c;
883 | break;
884 | } // switch
885 |
886 | c = *t++;
887 | }
888 |
889 | *s = '\0'; // always terminate the string
890 | }
891 | return result;
892 | }
893 |
894 | int parseConfigFile( tDictionary * dictionary, string path )
895 | {
896 | int result = 0;
897 | FILE * file;
898 | char buffer[ 4096 ]; // 4K seems like plenty
899 |
900 | if ( eaccess( path, R_OK ) != 0 ) // only attempt to parse it if there's something accessible there
901 | {
902 | // it's OK if the file is missing, otherwise complain
903 | if ( errno != ENOENT )
904 | {
905 | fprintf( stderr,
906 | "### Error: Unable to access config file \'%s\' (%d: %s)",
907 | path, errno, strerror(errno));
908 | result = errno;
909 | }
910 | }
911 | else
912 | {
913 | debugf( 3, "config file: \'%s\'\n", path );
914 |
915 | file = fopen(path, "r");
916 | if (file == NULL)
917 | {
918 | fprintf( stderr, "### Error: Unable to open config file \'%s\' (%d: %s)\n",
919 | path, errno, strerror(errno) );
920 | result = errno;
921 | }
922 | else
923 | {
924 | while ( fgets( buffer, sizeof( buffer ), file) != NULL )
925 | {
926 | trimTrailingWhitespace( buffer );
927 | debugf( 4,"line: \'%s\'\n", buffer );
928 |
929 | tHash hash = 0;
930 | string s = buffer;
931 | while (isspace(*s)) {
932 | s++;
933 | }
934 |
935 | unsigned char c = (unsigned char) *s++;
936 | if (c != '\0') {
937 | while (c != '\0' && c != '=') {
938 | if ( c != kKeywordSeparator ) {
939 | hash = fKeywordHashChar( hash, c );
940 | }
941 | c = (unsigned char) *s++;
942 | }
943 |
944 | if (c == '=') {
945 | // skip over whitespace from the beginning of the value
946 | while ( isspace(*s) ) {
947 | s++;
948 | }
949 | trimTrailingWhitespace( (char *)s );
950 | }
951 | debugf( 4,"hash = 0x%016lx, value = \'%s\'\n", hash, s);
952 | addParam( dictionary, hash, s );
953 | }
954 | }
955 | fclose(file);
956 | }
957 | }
958 |
959 | return result;
960 | }
961 |
962 | /**
963 | * @brief Look for config files to process, and use them to update the main dictionary.
964 | *
965 | * First, look in /etc/.conf then in ~/.config/.conf, and finally the file
966 | * passed as a -c parameter, if any, then any parameters on the command line (except -c)
967 | * Where a parameter occurs more than once in a dictionary, the most recent definition 'wins'
968 | */
969 |
970 | int parseConfig( string path )
971 | {
972 | int result = 0;
973 | char temp[PATH_MAX];
974 |
975 | snprintf( temp, sizeof( temp ), "/etc/%s.conf", gMyName );
976 | debugf( 4, "/etc path: \"%s\"\n", temp );
977 |
978 | result = parseConfigFile( gMainDict, temp );
979 |
980 | if ( result == 0 )
981 | {
982 | string home = getenv("HOME");
983 | if ( home == NULL)
984 | {
985 | home = getpwuid( getuid() )->pw_dir;
986 | }
987 | if ( home != NULL )
988 | {
989 | snprintf( temp, sizeof( temp ), "%s/.config/%s.conf", home, gMyName );
990 | debugf( 4, "~ path: \"%s\"\n", temp );
991 |
992 | result = parseConfigFile( gMainDict, temp );
993 | }
994 | }
995 |
996 | if ( result == 0 && path != NULL )
997 | {
998 | struct stat fileStat;
999 |
1000 | if ( stat( path, &fileStat ) != 0 )
1001 | {
1002 | fprintf( stderr, "### Error: config path '%s' is not valid (%d: %s)\n",
1003 | path, errno, strerror(errno) );
1004 | result = -1;
1005 | }
1006 | switch ( fileStat.st_mode & S_IFMT )
1007 | {
1008 | case S_IFDIR:
1009 | snprintf( temp, sizeof( temp ), "%s/%s.conf", path, gMyName );
1010 | break;
1011 |
1012 | case S_IFLNK:
1013 | case S_IFREG:
1014 | strncpy( temp, path, sizeof( temp ) );
1015 | break;
1016 |
1017 | default:
1018 | fprintf( stderr, "### Error: config path '%s' is neither a file nor directory.\n", path );
1019 | result = -1;
1020 | break;
1021 | }
1022 |
1023 | if ( result == 0 )
1024 | {
1025 | debugf( 4, "-c path: %s\n", temp );
1026 | result = parseConfigFile( gMainDict, temp );
1027 | }
1028 | }
1029 |
1030 | return result;
1031 | }
1032 |
1033 | /**
1034 | * @brief recurive function to walk the path looking for config files
1035 | * @param gFileDict
1036 | * @param path
1037 | */
1038 | void _recurseConfig( tDictionary * dictionary, string path )
1039 | {
1040 | char temp[PATH_MAX];
1041 |
1042 | if ( strlen(path) != 1 || (path[0] != '/' && path[0] != '.'))
1043 | {
1044 | strncpy( temp, path, sizeof( temp ) );
1045 | _recurseConfig( dictionary, dirname( temp ) );
1046 | /* check for a config file & if found, parse it */
1047 | debugf( 4, "recurse = \'%s\'\n", path );
1048 | snprintf( temp, sizeof(temp), "%s/%s.conf", path, gMyName );
1049 | parseConfigFile( dictionary, temp );
1050 | }
1051 | }
1052 |
1053 | /**
1054 | Traverse the path to the source file, looking for config files.
1055 | Apply them in the reverse order, so ones lower in the hierarchy
1056 | can override parameters defined in higher ones.
1057 | */
1058 | int processConfigPath( string path )
1059 | {
1060 | int result = 0;
1061 | char temp[PATH_MAX];
1062 | char * absolute;
1063 |
1064 | /* dirname may modify its argument, so make a copy first */
1065 | strncpy( temp, path, sizeof(temp) );
1066 | absolute = realpath( dirname(temp), NULL );
1067 | if ( absolute == NULL )
1068 | {
1069 | fprintf( stderr, "### Error: path \'%s\' appears to be invalid (%d: %s).\n",
1070 | path, errno, strerror(errno) );
1071 | return -5;
1072 | }
1073 | else
1074 | {
1075 | debugf( 3, "abs = %s, cached = %s\n", absolute, gCachedPath );
1076 | if ( gCachedPath == NULL || strcmp( gCachedPath, absolute ) != 0 )
1077 | {
1078 | debugf( 3, "absolute = \'%s\'\n", absolute );
1079 | emptyDictionary( gPathDict );
1080 | gCachedPath = absolute;
1081 | _recurseConfig( gPathDict, absolute );
1082 | }
1083 |
1084 | /* we may have picked up a new definition of {destination} as
1085 | * a result of parsing different config files. If so, we need
1086 | * to rebuild gSeriesDict to reflect the new destination */
1087 |
1088 | string destination = findParam( kKeywordDestination );
1089 |
1090 | if ( destination == NULL)
1091 | {
1092 | fprintf( stderr, "### Error: no destination defined.\n" );
1093 | result = -3;
1094 | }
1095 | else
1096 | {
1097 | if ( gCachedSeries == NULL || strcmp( gCachedSeries, destination ) != 0 )
1098 | {
1099 | debugf( 2, "destination = \'%s\'\n", destination );
1100 | // fill the dictionary with hashes of the directory names in the destination
1101 | emptyDictionary( gSeriesDict );
1102 | gCachedSeries = destination;
1103 | buildSeriesDictionary( destination );
1104 | }
1105 | }
1106 | }
1107 | return result;
1108 | }
1109 |
1110 | int processFile( string path )
1111 | {
1112 | int result = 0;
1113 |
1114 | processConfigPath( path );
1115 |
1116 | parsePath( path );
1117 |
1118 | printDictionary( gFileDict );
1119 |
1120 | string template = findParam( kKeywordTemplate );
1121 |
1122 | if ( template == NULL)
1123 | {
1124 | fprintf( stderr, "### Error: no template found.\n" );
1125 | result = -2;
1126 | }
1127 | else
1128 | {
1129 | debugf( 2, "template = \'%s\'\n", template );
1130 |
1131 | string output = buildString( template );
1132 | string exec = findParam( kKeywordExecute );
1133 | if ( exec != NULL)
1134 | {
1135 | result = system( output );
1136 | }
1137 | else
1138 | {
1139 | printf( "%s\n", output );
1140 | }
1141 | free( (void *)output );
1142 | }
1143 | emptyDictionary( gFileDict );
1144 | return result;
1145 | }
1146 |
1147 | string usage =
1148 | "Command Line Options\n"
1149 | " -d set {destination} parameter\n"
1150 | " -t set {template} paameter\n"
1151 | " -x pass each output string to the shell to execute\n"
1152 | " -- read from stdin\n"
1153 | " -0 stdin is null-terminated (also implies '--' option)\n"
1154 | " -v set the level of verbosity (debug info)\n";
1155 |
1156 |
1157 | int main( int argc, string argv[] )
1158 | {
1159 | int result;
1160 | int cnt;
1161 | string configPath = NULL;
1162 | time_t secsSinceEpoch;
1163 | struct tm *timeStruct;
1164 |
1165 | gMainDict = createDictionary( "Main" );
1166 | gSeriesDict = createDictionary( "Series" );
1167 | gPathDict = createDictionary( "Path" );
1168 | gFileDict = createDictionary( "File" );
1169 |
1170 | gMyName = basename( strdup( argv[0] ) ); // posix flavor of basename modifies its argument
1171 |
1172 | secsSinceEpoch = time( NULL );
1173 | timeStruct = localtime( &secsSinceEpoch );
1174 | if ( timeStruct != NULL )
1175 | {
1176 | gNextYear = timeStruct->tm_year + 1900 + 1;
1177 | }
1178 |
1179 | int k = 1;
1180 | cnt = argc;
1181 | for ( int i = 1; i < argc; i++ )
1182 | {
1183 | debugf( 4, "a: i = %d, k = %d, cnt = %d, \'%s\'\n", i, k, cnt, argv[ i ] );
1184 |
1185 | // is it the config file option?
1186 | if ( strcmp( argv[ i ], "-c" ) == 0 )
1187 | {
1188 | cnt -= 2;
1189 | ++i;
1190 | configPath = strdup( argv[ i ] ); // make a copy - argv will be modified
1191 | }
1192 | else
1193 | {
1194 | if ( i != k )
1195 | {
1196 | argv[ k ] = argv[ i ];
1197 | }
1198 | ++k;
1199 | }
1200 | }
1201 | argc = cnt;
1202 |
1203 | result = parseConfig( configPath );
1204 |
1205 | if ( configPath != NULL )
1206 | {
1207 | free( (void *)configPath );
1208 | configPath = NULL;
1209 | }
1210 |
1211 | k = 1;
1212 | for ( int i = 1; i < argc && result == 0; i++ )
1213 | {
1214 | debugf( 4, "b: i = %d, k = %d, cnt = %d, \'%s\'\n", i, k, cnt, argv[i] );
1215 |
1216 | // is it an option?
1217 | if (argv[i][0] == '-' )
1218 | {
1219 | char option = argv[i][1];
1220 | if ( argv[i][2] != '\0' )
1221 | {
1222 | fprintf( stderr, "### Error: option \'%s\' not understood.\n", argv[ i ] );
1223 | fprintf( stderr, "%s", usage );
1224 | result = -1;
1225 | }
1226 | else
1227 | {
1228 | --cnt;
1229 |
1230 | switch ( option )
1231 | {
1232 | // case 'c': // config file already handled
1233 | // break;
1234 |
1235 | case 'd': // destination
1236 | addParam( gMainDict, kKeywordDestination, argv[ i ] );
1237 | --cnt;
1238 | ++i;
1239 | break;
1240 |
1241 | case 't': // template
1242 | addParam( gMainDict, kKeywordTemplate, argv[ i ] );
1243 | --cnt;
1244 | ++i;
1245 | break;
1246 |
1247 | case 'x': // execute
1248 | addParam( gMainDict, kKeywordExecute, "yes" );
1249 | break;
1250 |
1251 | case '-': // also read lines from stdin
1252 | addParam( gMainDict, kKeywordStdin, "yes" );
1253 | break;
1254 |
1255 | case '0': // entries from stdio are terminated with NULLs
1256 | addParam( gMainDict, kKeywordStdin, "yes" );
1257 | addParam( gMainDict, kKeywordNullTermination, "yes" );
1258 | break;
1259 |
1260 | case 'v': // verbose output, i.e. show debug logging
1261 | if ( i < argc - 1 )
1262 | {
1263 | ++i;
1264 | --cnt;
1265 |
1266 | gDebugLevel = atoi( argv[i] );
1267 | fprintf(stderr, "verbosity = %d\n", gDebugLevel );
1268 | }
1269 | break;
1270 |
1271 | default:
1272 | ++cnt;
1273 | --i; // point back at the original option
1274 | fprintf( stderr, "### Error: option \'%s\' not understood.\n", argv[ i ] );
1275 | fprintf( stderr, "%s", usage );
1276 | result = -1;
1277 | break;
1278 | }
1279 | }
1280 | }
1281 | else
1282 | {
1283 | if ( i != k )
1284 | {
1285 | argv[k] = argv[i];
1286 | }
1287 | ++k;
1288 | }
1289 | }
1290 | argc = cnt;
1291 |
1292 | /* printDictionary( mainDict ); */
1293 |
1294 | for ( int i = 1; i < argc; i++ )
1295 | {
1296 | debugf( 4, "b: i = %d, k = %d, cnt = %d, \'%s\'\n", i, k, cnt, argv[i] );
1297 |
1298 | // is it an option?
1299 | if ( argv[i][0] == '-' )
1300 | {
1301 | char option = argv[i][1];
1302 | if ( argv[i][2] != '\0' )
1303 | {
1304 | fprintf( stderr, "### Error: option \'%s\' not understood.\n", argv[i] );
1305 | result = -1;
1306 | }
1307 | else
1308 | {
1309 | --cnt;
1310 |
1311 | switch ( option )
1312 | {
1313 | case 'd': // destination
1314 | addParam( gMainDict, kKeywordDestination, argv[i] );
1315 | --cnt;
1316 | ++i;
1317 | break;
1318 |
1319 | case 't': // template
1320 | addParam( gMainDict, kKeywordTemplate, argv[i] );
1321 | --cnt;
1322 | ++i;
1323 | break;
1324 |
1325 | case 'x': // execute
1326 | addParam( gMainDict, kKeywordExecute, "yes" );
1327 | break;
1328 |
1329 | case '-': // also read lines from stdin
1330 | addParam( gMainDict, kKeywordStdin, "yes" );
1331 | break;
1332 |
1333 | case '0': // entries from stdio are terminated with NULLs
1334 | addParam( gMainDict, kKeywordNullTermination, "yes" );
1335 | break;
1336 |
1337 | case 'v': //verbose output, i.e. debug logging
1338 | if ( i < argc - 1 )
1339 | {
1340 | ++i;
1341 | --cnt;
1342 |
1343 | gDebugLevel = atoi( argv[i] );
1344 | fprintf( stderr, "verbosity = %d\n", gDebugLevel );
1345 | }
1346 | break;
1347 |
1348 | default:
1349 | ++cnt;
1350 | --i; // point back at the original option
1351 | fprintf( stderr, "### Error: option \'%s\' not understood.\n", argv[i] );
1352 | result = -1;
1353 | break;
1354 | }
1355 | }
1356 | }
1357 | else
1358 | {
1359 | if ( i != k )
1360 | {
1361 | argv[k] = argv[i];
1362 | }
1363 | ++k;
1364 | }
1365 | }
1366 | argc = cnt;
1367 |
1368 | printDictionary( gMainDict );
1369 |
1370 | for ( int i = 1; i < argc && result == 0; ++i )
1371 | {
1372 | debugf( 4, "%d: \'%s\'\n", i, argv[ i ] );
1373 | processFile( argv[i] );
1374 | }
1375 |
1376 | // should we also read from stdin?
1377 | if ( findParam( kKeywordStdin ) != NULL )
1378 | {
1379 | char line[PATH_MAX];
1380 |
1381 | if ( findParam( kKeywordNullTermination ) != NULL )
1382 | {
1383 | // ...therefore lines are terminated by \0
1384 | char * p = line;
1385 | cnt = sizeof( line );
1386 |
1387 | while (!feof(stdin))
1388 | {
1389 | char c = fgetc( stdin );
1390 | *p++ = c;
1391 | cnt--;
1392 |
1393 | if ( c == '\0' || cnt < 1 )
1394 | {
1395 | debugf( 4, "null: %s\n", line );
1396 | processFile( line );
1397 |
1398 | p = line;
1399 | cnt = sizeof( line );
1400 | }
1401 | }
1402 | }
1403 | else
1404 | {
1405 | while (!feof(stdin))
1406 | {
1407 | // ...otherwise lines are terminated by \n
1408 | fgets( line, sizeof(line), stdin );
1409 |
1410 | // lop off the inevitable trailing newline(s)/whitespace
1411 | trimTrailingWhitespace( line );
1412 | debugf( 4,"eol: %s\n", line);
1413 | processFile( line);
1414 | }
1415 | }
1416 | }
1417 |
1418 | // all done, clean up.
1419 | destroyDictionary( gFileDict );
1420 | destroyDictionary( gPathDict );
1421 | destroyDictionary( gSeriesDict );
1422 | destroyDictionary( gMainDict );
1423 |
1424 | return result;
1425 | }
1426 |
--------------------------------------------------------------------------------
/dvr2plex.h:
--------------------------------------------------------------------------------
1 | //
2 | // Created by Paul on 4/4/2019.
3 | //
4 |
5 | #ifndef DVR2PLEX_H
6 | #define DVR2PLEX_H
7 |
8 | #if CMAKE_BUILD_TYPE == Debug
9 | #define DEBUG 1
10 | #endif
11 |
12 | typedef const char * string;
13 |
14 | extern int gDebugLevel;
15 | #define debugf( level, format, ... ) do { if (gDebugLevel >= level) fprintf( stderr, format, __VA_ARGS__ ); } while (0)
16 |
17 | #endif // DVR2PLEX_H
18 |
--------------------------------------------------------------------------------
/keywords.hash:
--------------------------------------------------------------------------------
1 | //
2 | // Created by paul on 1/14/20.
3 | //
4 |
5 | prefix = "Keyword"
6 |
7 | # the character mapping array
8 | # array starts out mapping one-to-one, i.e. input = output
9 | #
10 | mappings:
11 | {
12 | # make the generated hashes case-insensitive
13 | ignoreCase = true
14 |
15 | # mark these characters as 'kKeywordSeparator'
16 | Separator = " ._-"
17 |
18 | Ignored = "?!"
19 |
20 | # mark the different styles of brackets as equivalent
21 | LBracket = "({["
22 | RBracket = ")}]"
23 | }
24 |
25 | # the strings to hash into the enum
26 | # if there's a comma, first string is for symbol, second is to hash
27 | #
28 | keywords = [
29 | "Basename",
30 | "Country",
31 | "DateRecorded",
32 | "DestSeries",
33 | "Destination",
34 | "Episode",
35 | "Execute",
36 | "Extension",
37 | "FirstAired",
38 | "NullTermination",
39 | "Path",
40 | "Season",
41 | "SeasonFolder",
42 | "Series",
43 | "Source",
44 | "Stdin",
45 | "Template",
46 | "Title",
47 | "Year"
48 | ]
--------------------------------------------------------------------------------
/patterns.hash:
--------------------------------------------------------------------------------
1 | //
2 | // Created by paul on 1/14/20.
3 | //
4 |
5 | prefix = "Pattern"
6 |
7 | # the character mapping array
8 | # array starts out mapping one-to-one, i.e. input = output
9 | #
10 | mappings:
11 | {
12 | # make the generated hashes case-insensitive
13 | ignoreCase = true
14 |
15 | # mark these characters as 'kPatternSeperator'
16 | Seperator = " ._-"
17 |
18 | # mark this range as 'kPatternDigit'
19 | Digit = "0-9"
20 |
21 | # mark the different styles of brackets as equivalent
22 | LBracket = "({["
23 | RBracket = ")}]"
24 | }
25 |
26 | # the strings to hash into the enum
27 | # if there's a comma, first string is for symbol, second is to hash
28 | #
29 | keywords = [
30 | "SnnEnn,S00E00",
31 | "SyyyyEnn,S0000E00",
32 | "SnnEn,S00E0",
33 | "SnEnn,S0E00",
34 | "SnEn,S0E0",
35 | "Ennn,E000",
36 | "Ennnn,E0000",
37 | "nXnn,0x00",
38 | "nnXnn,00x00",
39 | "TwoDigits,00",
40 | "FourDigits,0000",
41 | "SixDigits,000000",
42 | "EightDigits,00000000",
43 | "CountryUSA,(USA)",
44 | "CountryUS,(US)",
45 | "CountryUK,(UK)",
46 | "Year,(0000)"
47 | ]
48 |
--------------------------------------------------------------------------------