├── README.md ├── smartrename.applescript └── smartrename.pl /README.md: -------------------------------------------------------------------------------- 1 | # devonthink-smart-rename 2 | Perl/Apple Script to rename DEVONthink 3 documents based on name, content and date. Watch out for Perl dependencies and local deployment for DEVONthink 3. If you have a better way to deploy it other than hardcoding the path to the perl script, let me know. 3 | -------------------------------------------------------------------------------- /smartrename.applescript: -------------------------------------------------------------------------------- 1 | use AppleScript version "2.4" -- Yosemite (10.10) or later 2 | use scripting additions 3 | use framework "Foundation" 4 | 5 | property pTitle : "DEVONthink Smart Rename" 6 | 7 | on run 8 | tell application "Finder" 9 | set current_path to container of (path to me) as alias 10 | set current_path to "/Users/You/PathToPerlScriptFolder/" 11 | set myScriptPath to POSIX path of current_path & "smartrename.pl" 12 | -- log current_path 13 | -- log myScriptPath 14 | end tell 15 | 16 | tell application id "DNtp" 17 | -- display dialog "Hello" 18 | if not (exists think window 1) then error "No window is open." 19 | -- if not (exists content record) then error "Please open exactly one document." 20 | 21 | set selectedText to selected text of think window 1 as string 22 | -- log selectedText 23 | 24 | -- set today to do shell script "date +'%Y-%m-%d'" 25 | 26 | try 27 | set selectedItems to selection 28 | 29 | repeat with selectedItem in selectedItems 30 | if class of selectedItem = record then 31 | 32 | set documentName to name of selectedItem as Unicode text 33 | -- log documentName 34 | set _creationDate to creation date of selectedItem 35 | -- log _creationDate 36 | set creationDate to my formatDate(_creationDate) 37 | -- log _creationDate 38 | 39 | set scriptCommand to ("perl " & quoted form of (myScriptPath) & space & quoted form of documentName & space & quoted form of creationDate & space & quoted form of selectedText) as Unicode text 40 | -- set scriptCommand to ("perl " & quoted form of (myScriptPath) & space & quoted form of "„Hallö ß ü — -" & space & quoted form of creationDate) 41 | -- log scriptCommand 42 | 43 | set newName to (do shell script scriptCommand) 44 | -- set test to do shell script "echo " & quoted form of "Hallö" & " | sed -E 's/" & "ö" & "/" & "o" & "/g'" 45 | -- set test to do shell script "echo " & "„Hallö ß ü — -" & "| perl -e ' print @ARGV[0,1]; while () { print } '" 46 | 47 | -- log test 48 | -- display alert newName 49 | 50 | set name of selectedItem to newName 51 | 52 | else 53 | -- log "Not a record" 54 | end if 55 | end repeat 56 | 57 | on error error_message number error_number 58 | -- if error_number is not -128 then display alert "DEVONthink" message error_message as warning 59 | if error_number is not -128 then log error_message 60 | end try 61 | end tell 62 | end run 63 | 64 | on formatDate(baseDate) 65 | set [_day, _month, _year] to [day, month, year] of baseDate 66 | # Change "May" to "5" -> crazy 67 | set _month to _month * 1 68 | set _month to text -1 thru -2 of ("0" & _month) 69 | set _day to text -1 thru -2 of ("0" & _day) 70 | set the text item delimiters to "-" 71 | return {_year, _month, _day} as string 72 | end formatDate 73 | -------------------------------------------------------------------------------- /smartrename.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -l -w 2 | #region use 3 | 4 | use v5.14; 5 | 6 | use utf8; 7 | use strict; 8 | use autodie; 9 | use warnings; 10 | use warnings qw( FATAL utf8 ); 11 | use open qw( :encoding(UTF-8) :std ); 12 | use charnames qw( :full :short ); 13 | use feature 'unicode_strings'; 14 | 15 | use Data::Dumper; 16 | use Date::Manip; 17 | 18 | use File::Basename qw( basename ); 19 | use Carp qw( carp croak confess cluck ); 20 | use Encode; 21 | use Encode::Locale; 22 | use Unicode::Normalize qw( NFD NFC ); 23 | 24 | END { close STDOUT } 25 | 26 | use constant false => 0; 27 | use constant true => 1; 28 | 29 | #endregion use 30 | 31 | # Set to 0 for normal operation 32 | # Set to 1 to use test data and minimal debug output 33 | # Set to 2 to add basic debug output 34 | # Set to 3 to add extreme debug output 35 | use constant debug => 0; 36 | 37 | # Conversion doesn't work this way 38 | # @ARGV = map { decode( locale => $_ ) } @ARGV; 39 | 40 | # Convert ARGV into unicode from UTF-8 41 | # For all the unicode pains, see https://stackoverflow.com/questions/6162484/why-does-modern-perl-avoid-utf-8-by-default/6163129%236163129 42 | if ( grep /\P{ASCII}/ => @ARGV ) { 43 | @ARGV = map { decode( "UTF-8", $_ ) } @ARGV; 44 | } 45 | print @ARGV if debug > 2; 46 | 47 | # Get args from the command line 48 | my ( $fileName, $documentDate, $appendString ) = @ARGV; 49 | print Dumper @ARGV if debug > 2; 50 | print "ARGV Filename: $fileName" if debug > 2; 51 | 52 | #region setup 53 | my @strings; 54 | $strings[0] = $fileName; 55 | 56 | # Check if all necessary arguments are present, othwerwise exit 57 | # or continue with test data (if debug > 0) 58 | my $has_args = checkARGS(); 59 | exit if !$has_args && !debug; 60 | setTestData() if !$has_args; 61 | 62 | # Get today's date to prepend if no other date is available 63 | # And no, it doesn't work in only one line 64 | my $today = `date +"%Y-%m-%d"`; 65 | chomp($today); 66 | 67 | my $date_ISO = qr/(\d\d\d\d_\d\d_\d\d) | (\d\d\d\d-\d\d-\d\d)/x; 68 | my $date_ISO_Strict = qr/(\d\d\d\d-\d\d-\d\d)/x; 69 | my $date_US = qr/(\d\d\/\d\d\/\d\d\d\d)/x; 70 | my $date_DE = qr/(\d\d\.\d\d\.(?:\d\d){1,2})(?:[^\d]|$)/x; 71 | my $dateTime_ISO = qr/(\d\d\d\d_\d\d_\d\d _ \d\d_\d\d_\d\d)/x; 72 | my $notLeading = qr/(.*)(? 2; 91 | print "01 Date Candidate: $dateCandidate" if debug > 2; 92 | print "02 Date Override: $dateOveride" if debug > 2; 93 | 94 | foreach my $string (@strings) { 95 | 96 | print "03: $string" if debug; 97 | 98 | # Get rid of leading and trailing garbage 99 | # Todo: check if necessary 100 | $string = trim( convertUnicode($string) ); 101 | print "04: $string" if debug > 1; 102 | 103 | # exit; 104 | 105 | # Append AppendString only if not a date 106 | $string .= ' - ' . $appendString 107 | unless $dateCandidate or !$appendString; 108 | print "05: $string" if debug > 1; 109 | 110 | # Get all raw dates from string 111 | @dates = $string =~ / $dateTime_ISO | $date_ISO | $date_US | $date_DE /gx; 112 | 113 | # Filter list to remove all undefined 114 | @dates = grep( defined, @dates ); 115 | 116 | # Preserve all dates for later replacement mapping 117 | my @originalDates = @dates; 118 | print Dumper @originalDates if debug > 2; 119 | 120 | # Run regex (dateTime_ISO -> date_ISO) against all elements of the array 121 | @dates = map { 122 | ( my $s = $_ ) =~ 123 | s/(\d\d\d\d)_(\d\d)_(\d\d) _ \d\d_\d\d_\d\d/$1-$2-$3/gx; 124 | $s 125 | } @dates; 126 | 127 | # Run regex ('_' -> '-') against all elements of the array 128 | @dates = map { ( my $s = $_ ) =~ s/_/-/g; $s } @dates; 129 | 130 | # Replace all German dates DD.MM.YY(YY) with YYYY-MM-DD 131 | @dates = map { 132 | ( my $s = $_ ) =~ s/ (\d\d) \. (\d\d) \. ((?:\d\d){1,2}) /$3-$2-$1/gx; 133 | $s 134 | } @dates; 135 | 136 | # Parse the dates into YYYY-MM-DD or leave it untouched 137 | @dates = map { UnixDate( ParseDate($_), '%Y-%m-%d' ) || $_ } @dates; 138 | print Dumper @dates if debug > 2; 139 | 140 | # Eleminating duplicates, using a hash 141 | my %seen; 142 | @dates = map { !$seen{$_}++ ? $_ : '' } @dates; 143 | print Dumper @dates if debug > 2; 144 | 145 | # Concat all dates into one search patten and capture the parts inbetween 146 | # Use quotemeta to escape all non-literal characters to function in regex 147 | my $find = join( '(.*)', map { quotemeta($_) } @originalDates ) . '(.*)'; 148 | 149 | # Concat all replacements with each a subsequent backreference 150 | # But make sure that if @dates is empty we replace string with itself ($1) 151 | my $i; 152 | my $replace = 153 | $#dates == -1 ? '${1}' : join( '', map { $i++; "$_\${$i}" } @dates ); 154 | 155 | # Put it in quotes so that s/x/y/ee can evaluate 156 | $replace = '"' . $replace . '"'; 157 | 158 | print "Find: $find" if debug > 2; 159 | print "Replace: $replace" if debug > 2; 160 | 161 | # Double evaluate the replacement string 162 | # See: https://stackoverflow.com/questions/392643/how-to-use-a-variable-in-the-replacement-side-of-the-perl-substitution-operator 163 | $string =~ s/$find/$replace/ee; 164 | print "06: $string" if debug > 1; 165 | 166 | # If date override, replace first date in string with document date 167 | $string =~ s/ $date_ISO_Strict /$documentDate/x if $dateOveride; 168 | 169 | # Make first date found in string leading date 170 | $string =~ s/ (.*?) $date_ISO_Strict/$2 $1 /x; 171 | print "07: $string" if debug > 1; 172 | 173 | # If no date at all yet, add prepending date, 174 | # either document date or today's date 175 | $string = ( $documentDate || $today ) . $string 176 | if !grep { $_ =~ /\d\d\d\d-\d\d-\d\d/ } $string; 177 | print "08: $string" if debug > 1; 178 | 179 | # Clean 180 | $string = clean($string); 181 | 182 | print debug ? "09: $string" : $string; 183 | print "----------\n" if debug; 184 | } 185 | 186 | # Check if all commandline arguments are present 187 | sub checkARGS { 188 | 189 | my $num_args = $#ARGV + 1; 190 | print "Args: $num_args\n------" if debug; 191 | 192 | # No error if no command line args in debug mode 193 | return false if ( $num_args == 0 && debug ); 194 | 195 | if ( $num_args < 1 ) { 196 | print 197 | "\nUsage: namewrangler.pl Filename [Document Date] [Append String]\n"; 198 | print "Filename: String\nDocument Date: String (YYYY-MM-DD)"; 199 | print "Append String: String\n"; 200 | print "Note: If Append String is present,"; 201 | print "Document Date needs to be present, too.\n"; 202 | return false; 203 | } 204 | return true; 205 | } 206 | 207 | # Remove unwanted characters 208 | sub clean { 209 | my ($input) = @_; 210 | 211 | # Remove line breaks 212 | $input =~ s/[\r\n]/ /gm; 213 | 214 | # Replace "_" in numbers with "-" 215 | $input =~ s/(\d)_(\d)/$1-$2/gm; 216 | 217 | # Replace "," in numbers with "." 218 | $input =~ s/(\d),(\d)/$1.$2/gm; 219 | 220 | # Remove all unwanted characters 221 | $input =~ s/[^-A-Za-z0-9À-ȕ. +_@()'€$%&"—-]//gm; 222 | $input =~ s/([. +_@()'"—-]){1}(\1)+/$1/gm; 223 | 224 | # Replace "_" with " " 225 | $input =~ s/[_]/ /gm; 226 | 227 | # Remove anything but " " after leading date 228 | # $input =~ s/ ^$date_ISO_Strict [^A-Za-z0-9'"]* /$1 /gmx; 229 | $input =~ s/ ^$date_ISO_Strict [^A-Za-zÀ-ȕ0-9'"]* /$1 /gmx; 230 | 231 | # Replace "word -word" and "word- word" with "word - word" 232 | $input =~ s/ -(\w)/ - $1/gm; 233 | $input =~ s/(\w)- /$1 - /gm; 234 | $input =~ s/ *([-–—]) [-–—] */ $1 /gm; 235 | 236 | # Replace multiple blanks with a single one 237 | $input =~ s/ +/ /gm; 238 | 239 | # Custom cleaners for my personal taste 240 | $input =~ s/invoice/Invoice/gm; 241 | $input =~ s/Invoice[^ ](?!$)/Invoice /gm; 242 | 243 | # Remove leading and trailing blanks and characters 244 | trim($input); 245 | 246 | # Titlecase (experimental) 247 | $input =~ s/([^.]\b[[:lower:]])(\w)/\U$1\L$2/gx; 248 | 249 | return $input; 250 | } 251 | 252 | # Remove unwanted leading or trailing character (not just blanks) 253 | sub trim { 254 | my ($input) = @_; 255 | 256 | # Leading 257 | $input =~ s/^[^(A-Za-zÀ-ȕ0-9"'@\$%&§€]*//gmx; 258 | print "Trim 1: $input" if debug > 2; 259 | 260 | # Trailing 261 | $input =~ s/[^A-Za-zÀ-ȕ0-9?"')@\$%&§€]*$//gmx; 262 | print "Trim 2: $input" if debug > 2; 263 | 264 | return $input; 265 | } 266 | 267 | # Convert quotes and other unicode characters to ascii 268 | sub convertUnicode { 269 | my ($input) = @_; 270 | 271 | # Remove or replace specific characters 272 | $input =~ s/:/ \N{EM DASH}/gm; 273 | 274 | # Remove or replace specific characters 275 | # $input =~ s///gm; 276 | 277 | # Single Quotes 278 | $input =~ s/[\x{2018}\x{201A}\x{201B}\x{FF07}\x{2019}\x{60}]/\x27/g; 279 | 280 | # Double Quotes 281 | $input =~ s/[\x{FF02}\x{201C}\x{201D}\x{201E}\x{275D}\x{275E}]/"/g; 282 | 283 | return $input; 284 | } 285 | 286 | # Check if a string is a US or DE date. Return as ISO YYYY-MM-DD 287 | sub detectDate { 288 | my ($dateCandidate) = @_; 289 | 290 | print "A Date Candidate: $dateCandidate" if debug > 2; 291 | 292 | # print $dateCandidate; 293 | 294 | # my $dateCandidate = '01. Juni 2020'; 295 | # my $dateCandidate = '12.04.2020'; 296 | # my $dateCandidate = '2020-04-30'; 297 | # my $dateCandidate = '04/10/2020'; 298 | # my $dateCandidate = '22. Mai 2020'; 299 | 300 | my ( $dateDE, $dateISO, $dateUS, $resultISO ); 301 | 302 | # Check if only numbers; if only numbers, does it match YYYYMMDD? 303 | my $justNumbers = $dateCandidate =~ /^\d+$/; 304 | my $looksLikeDateISO = 305 | $dateCandidate =~ /\b(\d{4})(0[1-9]|1[0-2])(0[1-9]|[12]\d|30|31)\b/; 306 | return '' if ( $justNumbers && !$looksLikeDateISO ); 307 | 308 | # Is it a straight forward German date (30.01.2020)? 309 | if ( $dateCandidate =~ /^$date_DE$/x ) { 310 | Date_Init( "Language=German", "DateFormat=non-US" ); 311 | 312 | $dateDE = ParseDate($dateCandidate); 313 | print "B DE Date: $dateDE" if debug > 2; 314 | } 315 | 316 | # Is it a straight forward US date (01/30/2020)? 317 | elsif ( $dateCandidate =~ /^$date_US$/x ) { 318 | Date_Init( "Language=English", "DateFormat=US" ); 319 | 320 | $dateUS = ParseDate($dateCandidate); 321 | print "C US Date: $dateUS" if debug > 2; 322 | } 323 | else { 324 | # Check all possibilities 325 | 326 | # Remove . from date string (Data::Manip can't deal with it) 327 | $dateCandidate =~ s/\./ /gmx; 328 | 329 | Date_Init( "Language=English", "DateFormat=US" ); 330 | $dateUS = ParseDate($dateCandidate); 331 | 332 | print "D US Date: $dateUS" if debug > 2; 333 | 334 | Date_Init( "Language=German", "DateFormat=non-US" ); 335 | $dateDE = ParseDate($dateCandidate); 336 | 337 | print "E DE Date: $dateDE" if debug > 2; 338 | 339 | Date_Init( "Language=English", "DateFormat=US" ); 340 | } 341 | 342 | if ( !$dateUS && !$dateDE ) { 343 | 344 | print "Bad Date" if debug > 1; 345 | $resultISO = ''; 346 | } 347 | else { 348 | $resultISO = $dateDE ? $dateDE : $dateUS; 349 | $resultISO = UnixDate( $resultISO, '%Y-%m-%d' ); 350 | } 351 | 352 | return $resultISO; 353 | } 354 | 355 | # Initalize testData for development, overrides ARGV 356 | #region setTestData 357 | sub setTestData { 358 | 359 | $documentDate = $documentDate || '2020-06-03'; 360 | $appendString = $appendString || ' company 361 | OFFER 1234-56-78 Content 362 | STUFF'; 363 | 364 | # $appendString = '20201020'; 365 | # $appendString = "13. Mai 2020"; 366 | $appendString = 367 | "„Das Produkt war überaus zwingend“ - Bestellbestätigung"; 368 | 369 | @strings = ( 370 | 'Möbel Fotos - IMG 3780', 371 | '2020_08_10 Some filename with Ümlaut', 372 | '10/20/2020 Words after a US-Date', 373 | ' 30.04.2020 Words after a German date', 374 | 'Abc 30.04.2020 German Date and Some More Text', 375 | '-Some Leading Word #30.04.2021 Some text at the end-', 376 | '_+Here\'s nothing - no date', 377 | '#Some garbage Leading #30.04.2021 Some garbage at the end;', 378 | '#2019-12-31 Leading Word 30.04.2021 -#Some other text at the end', 379 | 'Leading Word 30.04.21 -#Some other text at the end', 380 | '9999-12-11-Hallo World_', 381 | '2020-05-10 Company — Document Order_Test_052019', 382 | '2016_09_04_20_53_43', 383 | '2017_10_12_16_13_36 Lorem Ipsum Scan Name', 384 | '2019-11-08 - 2019_11_08_16_23_16 And even 2019-11-08 more dates', 385 | '2016-10-24 12_23_12345678_Bestätigung der Annahme GeSt (GeSt 1 A) 2015_ServiceOnline', 386 | '2020-05-26 Invoice - Company HG3H293D - 1234567890 -9.99 EUR.pdf', 387 | 'ScreenShot 2020-05-26 at 11.06.20@2x', 388 | '„Das Urteil: war — zwingend“', 389 | "2020-05-13 'Das Urteil war zwingend' - Frankfurter — Allgemeine Zeitung - TEST", 390 | 'Der Betrag ist 31,99 EUR' 391 | ); 392 | 393 | } 394 | 395 | #endregion setTestData 396 | --------------------------------------------------------------------------------