├── .gitignore ├── Changes ├── MANIFEST ├── MANIFEST.SKIP ├── Makefile.PL ├── README.md └── script ├── msgconvert.pl └── oledump.pl /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | /Makefile 3 | /Makefile.old 4 | /blib 5 | pm_to_blib 6 | /MANIFEST.bak 7 | /laboratory 8 | /MYMETA.json 9 | /MYMETA.yml 10 | -------------------------------------------------------------------------------- /Changes: -------------------------------------------------------------------------------- 1 | 0.904 2014-07-16 2 | 3 | - Use Email::Sender instead of Email::LocalDelivery. 4 | - Avoid line ending issues on Windows (thanks, Guillaume Cramoisan) 5 | 6 | 0.903 2009-10-26 7 | 8 | The 'yes, it is exacly two years later' release. 9 | - Mark output stream as UTF8 to silence 'wide character' warning. 10 | 11 | 0.902 2007-10-26 12 | 13 | - Right, that's the Email::Outlook::Message module. 14 | 15 | 0.901 2007-09-30 16 | 17 | - First version of msgconvert.pl based on seperate Email::Outlook::MSG 18 | module 19 | - oledump.pl now user-friendlier. 20 | - msgconvert.pl now either does delivery to one mbox file, or to 21 | individual .mime files for each input .msg file. 22 | 23 | 0.20070309 2007-03-09 24 | 25 | - Use Email::* instead of MIME::Tools for message building and header 26 | processing. 27 | - Don't crash on multipart/signed email. 28 | - Move main functionality (converting .MSG file into rfc882) into 29 | separate file. 30 | - Add auxilliary files to allow this to be an installable module 31 | (Makefile.PL, etc). 32 | 33 | 0.20060719 2006-07-19 34 | 35 | - Keep MIME::Tools sedated. 36 | 37 | 0.20060225 2006-02-25 38 | 39 | - Simplify code. 40 | 41 | 0.20060219 2006-02-19 42 | 43 | - Move OLE parsing to main program. 44 | - Parse nested MSG files (Bug reported by Christof Lukas). 45 | 46 | 0.20060218 2006-02-18 47 | 48 | - More sensible encoding warnings. 49 | 50 | 0.20040825 2004-08-25 51 | 52 | - Replace 'our' to declare globals with 'use vars'. This means the 53 | globals our now properly scoped inside the package and not the file. 54 | This also fixes the bug that this program did not work on perl versions 55 | below 5.6. (Bug reported by Tim Gustafson) 56 | 57 | 0.20040530 2004-05-30 58 | 59 | - Extract date from property 0047 (thanks, Marc Goodman). 60 | - Use address data to make To: and Cc: lines complete 61 | - Use the in-reply-to property 62 | - More unknown properties named. 63 | - Found another property containing an SMTP address. 64 | - Put non-SMTP type addresses back in output. 65 | 66 | 0.20040529 2004-05-29 67 | 68 | - Correctly format OLEDATE. 69 | 70 | 0.20040514 2004-05-14 71 | 72 | - Check if $self->{HEAD} actually exists before trying to add its 73 | contents to the output Mime object's header data. 74 | (Bug reported by Thomas Ng). 75 | - Don't produce multipart messages if not needed. 76 | (Bug reported by Justin B. Scout). 77 | 78 | 0.20040307 2004-03-07 79 | 80 | - Complete rewrite: All functional parts are now in the package 81 | MSGParser; 82 | - Creation of MIME::Entity object is delayed until the output routines, 83 | which means all data is known; This means I can create a 84 | multipart/alternative body. 85 | - Item names are parsed (thanks to bfrederi@alumni.sfu.ca for the 86 | information). 87 | 88 | 0.20040214 2004-02-14 89 | 90 | - Fix typos and incorrect comments. 91 | 92 | 0.20040104 2004-01-04 93 | 94 | - Handle address data slightly better 95 | - make From line less fake 96 | - make $verbose and $skippable_entries global vars 97 | - handle HTML variant of body text if present (though not optimally). 98 | 99 | 0.20020831 2002-08-31 100 | 101 | - long file name will definitely be used if present. 102 | - Full headers and mime type information are used when present. 103 | - Created generic system for specifying known items to be skipped. 104 | - Unexpected contents is never reason to bail out anymore. 105 | - Added support for usage message and option processing (--verbose). 106 | 107 | 0.20020715 2002-07-15 108 | 109 | - Recognize new items 'Cc', mime type of attachment, long filename of 110 | attachment, and full headers. 111 | - Attachments turn out to be numbered, so a regexp is now used to 112 | recognize label of items that are attachments. 113 | - Oldest recorded version :-). 114 | -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | Changes 2 | Makefile.PL 3 | MANIFEST This list of files 4 | tools/msgconvert.pl 5 | tools/oledump.pl 6 | -------------------------------------------------------------------------------- /MANIFEST.SKIP: -------------------------------------------------------------------------------- 1 | .git 2 | ^blib/ 3 | ^pm_to_blib$ 4 | ^Makefile.old$ 5 | ^Makefile$ 6 | ^laboratory/ 7 | ^MANIFEST.bak$ 8 | ^MANIFEST.SKIP$ 9 | ^.*\.tar\.gz$ 10 | \..*\.swp$ 11 | ^\.git/ 12 | ^\.gitignore$ 13 | -------------------------------------------------------------------------------- /Makefile.PL: -------------------------------------------------------------------------------- 1 | use 5.006; 2 | use ExtUtils::MakeMaker; 3 | WriteMakefile( 4 | 'NAME' => 'msgconvert', 5 | 'AUTHOR' => 'Matijs van Zuijlen', 6 | 'VERSION_FROM' => 'script/msgconvert.pl', 7 | 'EXE_FILES' => [ 'script/msgconvert.pl', 'script/oledump.pl' ], 8 | 'PREREQ_PM' => { 9 | Email::Outlook::Message => 0.901, 10 | OLE::Storage_Lite => 0.14, 11 | Email::Sender => 1.3, 12 | Getopt::Long => 0, 13 | Pod::Usage => 0, 14 | File::Basename => 0, 15 | }, 16 | ); 17 | 18 | 1; 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MSGConvert: A .MSG to mbox convertor 2 | 3 | **Note: `msgconvert` is now part of Email::Outlook::Message. Please visit the 4 | [Email::Outlook::Message repository on 5 | GitHub](https://github.com/mvz/email-outlook-message-perl) for the latest 6 | source code.** 7 | 8 | ## Usage 9 | 10 | To use it, run: 11 | 12 | perl -w msgconvert.pl YourMessage.msg 13 | 14 | This will produce a file YourMessage.mime containing the message in RFC822 15 | format. The program will complain about unrecognized OLE parts and other 16 | problems on stderr. If you supply the option `--verbose`, it will also tell you 17 | what OLE parts it knows about but doesn't use, and what I think they are. The 18 | option `--help` will make it print some usage information. 19 | 20 | You can also let MSGConvert deliver all .MSG files in one mbox file using 21 | `--mbox`, like so (assuming you made `msgconvert.pl` executable): 22 | 23 | msgconvert.pl --mbox some-mbox-file *.msg 24 | 25 | ## Installing 26 | 27 | The following instructions should work generally on any Unix-like system with a 28 | reasonably new Perl installed. 29 | 30 | * Download the script and put it in a convenient location. 31 | * Install the necessary Perl modules by executing the following: 32 | 33 | cpan -i Email::Sender Email::Outlook::Message 34 | 35 | You can run this as root if you would like to install these modules system-wide. 36 | 37 | On Debian and Ubuntu, try the following: 38 | 39 | * Download the script and put it in a convenient location. 40 | * Install Email::Outlook::Message and Email::Sender by executing the following: 41 | 42 | sudo apt-get install libemail-outlook-message-perl libemail-localdelivery-perl 43 | 44 | # Development 45 | 46 | For the latest source code, go to 47 | [MSGConvert on GitHub](https://github.com/mvz/msgconvert) and 48 | [Email::Outlook::Message on GitHub](https://github.com/mvz/email-outlook-message-perl). 49 | 50 | # Known Bugs/Issues 51 | 52 | Not all data that's in the .MSG file is converted. There simply are some parts 53 | whose meaning escapes me. However, most things are converted correctly by now, 54 | including plain text, HTML and RTF-formatted message bodies. 55 | 56 | Attachments with Apple-style resource forks, as well as PGP-signed email is 57 | known not to be converted properly. 58 | -------------------------------------------------------------------------------- /script/msgconvert.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | # 3 | # msgconvert.pl: 4 | # 5 | # Convert .MSG files (made by Outlook (Express)) to multipart MIME messages. 6 | # 7 | 8 | use Email::Outlook::Message; 9 | use Email::Sender::Transport::Mbox; 10 | use Getopt::Long; 11 | use Pod::Usage; 12 | use File::Basename; 13 | use vars qw($VERSION); 14 | $VERSION = "0.904"; 15 | 16 | # Setup command line processing. 17 | my $verbose = ''; 18 | my $mboxfile = ''; 19 | my $help = ''; # Print help message and exit. 20 | GetOptions( 21 | 'mbox=s' => \$mboxfile, 22 | 'verbose' => \$verbose, 23 | 'help|?' => \$help) or pod2usage(2); 24 | pod2usage(1) if $help; 25 | 26 | # Check file names 27 | defined $ARGV[0] or pod2usage(2); 28 | 29 | my $using_mbox = $mboxfile ne ''; 30 | my $transport; 31 | 32 | if ($using_mbox) { 33 | $transport = Email::Sender::Transport::Mbox->new({ filename => $mboxfile }); 34 | } 35 | 36 | foreach my $file (@ARGV) { 37 | my $msg = new Email::Outlook::Message($file, $verbose); 38 | my $mail = $msg->to_email_mime; 39 | if ($using_mbox) { 40 | $transport->send($mail, { from => $mail->header('From') || '' }); 41 | } else { 42 | my $basename = basename($file, qr/\.msg/i); 43 | my $outfile = "$basename.eml"; 44 | open OUT, ">:utf8", $outfile 45 | or die "Can't open $outfile for writing: $!"; 46 | binmode(OUT, ":utf8"); 47 | print OUT $mail->as_string; 48 | close OUT; 49 | } 50 | } 51 | 52 | # 53 | # Usage info follows. 54 | # 55 | __END__ 56 | 57 | =head1 NAME 58 | 59 | msgconvert.pl - Convert Outlook .msg files to mbox format 60 | 61 | =head1 SYNOPSIS 62 | 63 | msgconvert.pl [options] ... 64 | 65 | Options: 66 | --mbox deliver messages to mbox file 67 | --verbose be verbose 68 | --help help message 69 | 70 | =head1 OPTIONS 71 | 72 | =over 8 73 | 74 | =item B<--mbox> 75 | 76 | Deliver to the given mbox file instead of creating individual .mime 77 | files. 78 | 79 | =item B<--verbose> 80 | 81 | Print information about skipped parts of the .msg file. 82 | 83 | =item B<--help> 84 | 85 | Print a brief help message. 86 | 87 | =head1 DESCRIPTION 88 | 89 | This program will convert the messages contained in the Microsoft Outlook 90 | files ... to message/rfc822 files with extension .mime. 91 | Alternatively, if the --mbox option is present, all messages will be put in 92 | the given mbox file. This program will complain about unrecognized OLE 93 | parts in the input files on stderr. 94 | 95 | =head1 BUGS 96 | 97 | The program will not check whether output files already exist. Also, if you 98 | feed it "foo.MSG" and "foo.msg", you'll end up with one "foo.mime", 99 | containing one of the messages. 100 | 101 | Not all data that's in the .MSG file is converted. There simply are some 102 | parts whose meaning escapes me. One of these must contain the date the 103 | message was sent, for example. Formatting of text messages will also be 104 | lost. YMMV. 105 | 106 | =head1 AUTHOR 107 | 108 | Matijs van Zuijlen, C 109 | 110 | =head1 COPYRIGHT AND LICENSE 111 | 112 | Copyright 2002, 2004, 2006, 2007 by Matijs van Zuijlen 113 | 114 | This program is free software; you can redistribute it and/or modify 115 | it under the same terms as Perl itself. 116 | 117 | =cut 118 | -------------------------------------------------------------------------------- /script/oledump.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | # oledump.pl 3 | # 4 | # Based on: 5 | # 6 | # OLE::Storage_Lite Sample 7 | # Name : smplls.pl 8 | # by Kawai, Takanori (Hippo2000) 2000.11.4 9 | # Displays PPS structure of specified file 10 | # Just subset of lls that is distributed with OLE::Storage 11 | # 12 | # Changes made by Matijs van Zuijlen: 13 | # 2002.04.16: 14 | # Indenting 15 | # Added #! line. 16 | # Moved main of program to end. 17 | # English 18 | # Removed prototype info from PrnItem, awaiting knowledge on how to 19 | # resolve warnings about it. 20 | # Added code to print data as well. 21 | # 2002.04.17: 22 | # Allow more characters as printable. 23 | # 24 | #================================================================= 25 | 26 | use strict; 27 | use OLE::Storage_Lite; 28 | use Getopt::Long; 29 | use Pod::Usage; 30 | use locale; 31 | use vars qw($VERSION); 32 | $VERSION = "0.901"; 33 | 34 | #---------------------------------------------------------------- 35 | # PrnItem: Displays PPS infomations 36 | #---------------------------------------------------------------- 37 | sub PrnItem { 38 | my($oPps, $iLvl, $iTtl, $iDir, $prData) = @_; 39 | my $raDate; 40 | my %sPpsName = (1 => 'DIR', 2 => 'FILE', 5=>'ROOT'); 41 | 42 | # Make Name (including PPS-no and level) 43 | my $sName = OLE::Storage_Lite::Ucs2Asc($oPps->{Name}); 44 | $sName =~ s/\W/ /g; 45 | $sName = sprintf("%s %3d '%s' (pps %x)", 46 | ' ' x ($iLvl * 2), $iDir, $sName, $oPps->{No}); 47 | 48 | # Make Date 49 | my $sDate; 50 | if($oPps->{Type}==2) { 51 | $sDate = sprintf("%10x bytes", $oPps->{Size}); 52 | } 53 | else { 54 | $raDate = $oPps->{Time2nd}; 55 | $raDate = $oPps->{Time1st} unless($raDate); 56 | $sDate = ($raDate)? 57 | sprintf("%02d.%02d.%4d %02d:%02d:%02d", 58 | $raDate->[3], $raDate->[4]+1, $raDate->[5]+1900, 59 | $raDate->[2], $raDate->[1], $raDate->[0]) : ""; 60 | } 61 | 62 | # Display 63 | printf "%02d %-50s %-4s %s\n", 64 | ${$iTtl}++, 65 | $sName, 66 | $sPpsName{$oPps->{Type}}, 67 | $sDate; 68 | 69 | # MvZ: Print Data 70 | if ($prData and $oPps->{Type}==2 and $oPps->{Size} > 0) { 71 | my $data = $oPps->{Data}; 72 | my $length = length($data); 73 | my $numloops = $length/16; 74 | my $i; 75 | 76 | for ($i=0; $i<$numloops; $i++) { 77 | #print "$i; $numloops;\n"; 78 | my $substring = substr($data, $i*16, 16); 79 | my $copy = $substring; 80 | $substring =~ s/./sprintf("%02x ", ord($&))/sge; 81 | $copy =~ s/[^[:print:]]/./sg; 82 | #$copy =~ s/[\x00-\x1f\x7f-\xa0]/./sg; 83 | #$copy =~ s/[\x09\x0a\x0c\x0d]/./sg; 84 | 85 | print " " x 12; 86 | print sprintf("%-48s %-16s\n", $substring, $copy); 87 | } 88 | } 89 | 90 | # For its Children 91 | my $iDirN=1; 92 | foreach my $iItem (@{$oPps->{Child}}) { 93 | PrnItem($iItem, $iLvl+1, $iTtl, $iDirN, $prData); 94 | $iDirN++; 95 | } 96 | } 97 | 98 | # Main 99 | # 100 | my $prData; 101 | my $help = ''; # Print help message and exit. 102 | my $opt = GetOptions("with-data" => \$prData) or pod2usage(2); 103 | pod2usage(1) if $help; 104 | pod2usage(2) if($#ARGV < 0); 105 | foreach my $file (@ARGV) { 106 | my $oOl = OLE::Storage_Lite->new($file); 107 | my $oPps = $oOl->getPpsTree(1); 108 | die( $file. " must be a OLE file") unless($oPps); 109 | my $iTtl = 0; 110 | PrnItem($oPps, 0, \$iTtl, 1, $prData); 111 | } 112 | # 113 | # Usage info follows. 114 | # 115 | __END__ 116 | 117 | =head1 NAME 118 | 119 | oledump.pl - Dump structure of an OLE file. 120 | 121 | =head1 SYNOPSIS 122 | 123 | oledump.pl [options] ... 124 | 125 | Options: 126 | --with-data dump data too 127 | --help help message 128 | 129 | =head1 OPTIONS 130 | 131 | =over 8 132 | 133 | =item B<--with-data> 134 | 135 | Dump data as will, showing both hex and any printable characters. 136 | 137 | =item B<--help> 138 | 139 | Print a brief help message. 140 | 141 | =head1 DESCRIPTION 142 | 143 | This program will dump the PPS structure of OLE files passed to it on the 144 | command line. It is based on smplls.pl by Kawai, Takanori, which is part of 145 | the OLE::Storage_Lite distribution. 146 | 147 | =cut 148 | --------------------------------------------------------------------------------