├── LICENSE ├── theme.css ├── thunderbird-notmuch-import.py ├── testparser.py ├── README.rst ├── summary.py ├── emailparser.py ├── summary.html └── mb2md-3.20.pl /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013-2016, Johannes Buchner 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 10 | 11 | -------------------------------------------------------------------------------- /theme.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: Georgia; 3 | font-family: Verdana, sans-serif; 4 | background-color: white; 5 | } 6 | 7 | h5 { 8 | margin:0; 9 | padding:0; 10 | font-size: xx-small; 11 | text-transform: capitalize; 12 | } 13 | 14 | table { 15 | width: 100%; 16 | background-color: #F6F7F8; 17 | margin: 0; 18 | padding: 0; 19 | border: none; 20 | border-collapse: collapse; 21 | border-top: 10px solid #DDE4E9; 22 | border-bottom: 10px solid #DDE4E9; 23 | } 24 | td { 25 | font-size: x-large; 26 | padding-left: 10px; 27 | padding-right: 10px; 28 | } 29 | td.left { 30 | width: 45%; 31 | } 32 | td.right { 33 | width: 45%; 34 | text-align: right; 35 | } 36 | td.middle { 37 | width: 10%; 38 | text-align: center; 39 | font-size: 400%; 40 | } 41 | td.details { 42 | font-size: medium; 43 | } 44 | td.email { 45 | font-size: x-small; 46 | } 47 | td.email, td.details, td.left, td.right, td.middle { 48 | border: none; 49 | } 50 | td.gap { 51 | height: 1em; 52 | border-top: 10px solid #DDE4E9; 53 | border-bottom: 10px solid #DDE4E9; 54 | background-color: white; 55 | } 56 | td.gaplater { 57 | border: none; 58 | border-top: 10px solid #DDE4E9; 59 | border-bottom: 10px solid #DDE4E9; 60 | padding-top: 3em; 61 | padding-bottom: 2em; 62 | background-color: white; 63 | color: darkgrey; 64 | } 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /thunderbird-notmuch-import.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os 3 | import glob 4 | import subprocess 5 | 6 | thunderbirdpath = os.path.join(os.environ['HOME'], '.thunderbird') 7 | cf = configparser.ConfigParser() 8 | cf.read(os.path.join(thunderbirdpath, 'profiles.ini')) 9 | 10 | paths = [] 11 | 12 | for s in cf.sections(): 13 | if cf.has_option(s, 'Default') and cf.getint(s, 'Default') == 1: 14 | path = cf.get(s, 'Path') 15 | if cf.getint(s, 'IsRelative') == 1: 16 | path = os.path.join(thunderbirdpath, path) 17 | paths += glob.glob(os.path.join(path, 'ImapMail/*/INBOX*')) 18 | paths += glob.glob(os.path.join(path, 'Mail/*/Inbox')) 19 | 20 | print '# getting notmuch path ...' 21 | o = None 22 | try: 23 | p = subprocess.Popen(['notmuch', 'config', 'get', 'database.path'], stdout=subprocess.PIPE) 24 | if p.wait() == 0: 25 | o = p.stdout.read().strip() 26 | except OSError: 27 | print 'ERROR could not check notmuch config' 28 | print ' make sure notmuch is installed and configured with "notmuch config"' 29 | 30 | print '# will export maildir into %s' % o 31 | print '# execute the following commands:' 32 | 33 | for p in paths: 34 | if p.endswith('.msf') or 'trash' in p.lower(): 35 | continue 36 | print "perl mb2md-3.20.pl -s '%s' -d '%s'" % (p, o) 37 | 38 | print 39 | print 'notmuch new' 40 | 41 | 42 | -------------------------------------------------------------------------------- /testparser.py: -------------------------------------------------------------------------------- 1 | import notmuch 2 | import os 3 | import emailparser 4 | import sys 5 | import logging 6 | 7 | dbpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test-private') 8 | dbpath = sys.argv[1] 9 | db = notmuch.Database(dbpath) 10 | query = db.create_query(sys.argv[2]) 11 | 12 | if __name__ == '__main__': 13 | emailparser.default_logging() 14 | logging.getLogger('emailparser').setLevel(logging.DEBUG) 15 | for m in query.search_messages(): 16 | r = emailparser.parse_email_message(m) 17 | f = os.path.basename(m.get_filename()) 18 | txt = "" 19 | for info in r: 20 | txt += """ 21 | Flight %(departure)s --> %(arrival)s 22 | 23 | Departing %(departureTimestr)s %(boardingTime)s 24 | from %(departure)s %(departureTerminal)s %(departureGate)s 25 | arriving %(arrivalTimestr)s 26 | To %(arrival)s %(arrivalTerminal)s %(arrivalGate)s 27 | Flight number %(flightNumber)s with %(airline)s%(operator)s 28 | %(ticketNumber)s %(ticketText)s %(ticketDownload)s %(ticketPrint)s 29 | 30 | 31 | """ % info 32 | if txt != "": 33 | with open(os.path.join(dbpath, 'parsed', f), 'w') as fout: 34 | fout.write(txt.encode('utf8')) 35 | ftest = os.path.join(dbpath, 'expected', f) 36 | if os.path.exists(ftest): 37 | test = open(ftest).read() 38 | if txt == "": 39 | print "no parsing output for %s" % f 40 | print "expected:", ftest 41 | print test 42 | break 43 | elif txt != test: 44 | print "parsing difference for %s" % f 45 | print "expected:", ftest 46 | print test 47 | print "actual:", os.path.join(dbpath, 'parsed', f) 48 | print txt 49 | print 'to compare: meld %s %s' % (ftest, os.path.join(dbpath, 'parsed', f)) 50 | break 51 | else: 52 | print "result for %s" % f 53 | print txt 54 | print 'ok' 55 | else: 56 | if txt != "": 57 | print "unexpected parsing output for %s" % f 58 | print "actual:", os.path.join(dbpath, 'parsed', f) 59 | print txt 60 | break 61 | else: 62 | print "result for %s" % f 63 | print txt 64 | print 'ok' 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Flight & Hotel reservation email parser 2 | ======================================== 3 | 4 | Searches emails for flight tickets and hotel reservations. 5 | Builds a brief summary view of all your reservations over time. 6 | 7 | 8 | -------- 9 | Usage 10 | -------- 11 | 12 | 13 | 1. Adding emails to the email database 14 | 15 | People store emails in various ways. 16 | Here we support the notmuch database (https://notmuchmail.org/) 17 | It is trivial to include maildir emails into notmuch with "notmuch new". 18 | 19 | For email programs with mailbox, mb2md (http://batleth.sapienti-sat.org/projects/mb2md/) can be run to convert to maildir, followed by "notmuch new" 20 | 21 | For Thunderbird, the thunderbird-notmuch-import.py script is provided, 22 | which finds the relevant folders automatically. 23 | 24 | 2. Building the report 25 | 26 | run with some email search keywords:: 27 | 28 | $ python summary.py 'schema.org/FlightReservation OR ticket OR flight OR flug OR viaje OR booking OR confirmation OR confirmacion' 29 | 30 | It will give you some idea of what it finds, for example:: 31 | 32 | 2015-11-28 Flight HOUSTON, TX --> WASHINGTON, DC 33 | 34 | Departing 2015-11-28 19:10 35 | from HOUSTON, TX 36 | arriving 2015-11-28 23:05 37 | To WASHINGTON, DC 38 | Flight number UA1955 39 | 40 | 3. View report 41 | 42 | For an example report see "summary.html"! 43 | 44 | 45 | 46 | Features implemented 47 | ---------------------- 48 | 49 | * Summary of all flights, with crucial information (when, from-to, ...) 50 | * Including PDF eticket files, extracted from emails. 51 | * Parallel parsing for speed-up. 52 | * Parsing of the flight reservations schema following https://developers.google.com/gmail/markup/reference/flight-reservation 53 | * Some heuristic parsing of html emails in English, Spanish and German. 54 | If you have emails that can serve as additional test cases, please submit 55 | them. Contributions to the parsing are welcome! 56 | 57 | To Do 58 | ------------ 59 | 60 | * More heuristic parsing rules. 61 | * Implement hotel bookings (https://developers.google.com/gmail/markup/reference/hotel-reservation). Booking.com and some others produce the json version. 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /summary.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | import notmuch 4 | import BeautifulSoup 5 | import datetime 6 | import dateutil.parser 7 | import emailparser 8 | import logging 9 | from tzlocal import get_localzone 10 | import sys 11 | import os 12 | 13 | logging.basicConfig(filename='emailparser.log',level=logging.DEBUG) 14 | logFormatter = logging.Formatter("[%(name)s %(levelname)s]: %(message)s") 15 | consoleHandler = logging.StreamHandler() 16 | consoleHandler.setFormatter(logFormatter) 17 | consoleHandler.setLevel(logging.WARN) 18 | logging.getLogger().addHandler(consoleHandler) 19 | 20 | if len(sys.argv) < 3: 21 | sys.stderr.write("""SYNOPSIS: %(exe)s 22 | 23 | database: absolute path to notmuch database 24 | query: query to use. Has to be in quotes. 25 | 26 | Example usage: 27 | %(exe)s 'schema.org/FlightReservation OR ticket OR flight OR flug OR viaje OR booking OR confirmation OR confirmacion' 28 | 29 | To speed up date parsing, you can specify the languages to consider with the 30 | LANGUAGES environment variable: 31 | LANGUAGES="en de es" 32 | 33 | Author: Johannes Buchner (c) 2017 34 | """ % dict(exe=sys.argv[0])) 35 | sys.exit(1) 36 | db = notmuch.Database() 37 | query = sys.argv[1] 38 | query = db.create_query(query) 39 | #'schema.org/FlightReservation OR ticket OR flight OR flug OR viaje OR booking OR confirmation OR confirmacion') 40 | languages = os.environ.get('LANGUAGES', None) 41 | if languages is not None: 42 | languages = languages.split() 43 | #query = db.create_query('schema.org/FlightReservation OR eticket OR flight') 44 | #languages = ['en'] 45 | #query = db.create_query('schema.org/FlightReservation') 46 | 47 | all_reservations = emailparser.parse_multiple_email_messages(query.search_messages(), languages=languages) 48 | #all_reservations = [] 49 | #messages = list(query.search_messages()) 50 | #for i, m in enumerate(messages[::-1]): 51 | # print('handling %d/%d: "%s" from %s' % (i, len(messages), m.get_header('Subject'), 52 | # datetime.datetime.fromtimestamp(m.get_date()).strftime('%Y-%m-%d'))) 53 | # reservations = emailparser.parse_email_message(m, languages = languages) 54 | # print('got %d reservations' % len(all_reservations)) 55 | # all_reservations += reservations 56 | print('got %d reservations' % len(all_reservations)) 57 | 58 | def prepend(info, k, prefix): 59 | if info[k] and info[k] != '': 60 | info[k] = prefix + info[k] 61 | 62 | def dateConverter(day): 63 | #day = dateutil.parser.parse(dateText) 64 | if day.tzinfo is not None: 65 | return day 66 | print 'Warning: Using local time zone to order %s' % day 67 | local_tz = get_localzone() 68 | return day.replace(tzinfo=local_tz) 69 | 70 | # sort by departure time 71 | all_reservations.sort(key=lambda info: dateConverter(info['departureTime'])) 72 | 73 | previous = None 74 | fout = open('summary.html', 'w') 75 | fout.write(""" 76 | 77 | 78 | Flight summary 79 | 80 | 81 | 82 |

Flights

83 | 84 | """) 85 | 86 | file_id = 1 87 | for info in all_reservations: 88 | prepend(info, 'departureGate', 'Gate ') 89 | prepend(info, 'arrivalGate', 'Gate ') 90 | prepend(info, 'arrivalTerminal', 'Terminal ') 91 | prepend(info, 'departureTerminal', 'Terminal ') 92 | prepend(info, 'ticketNumber', 'Ticket#') 93 | prepend(info, 'operator', ' operated by ') 94 | flightday = info['departureTime'].date() 95 | prepend(info, 'boardingTimestr', 'Boarding ') 96 | 97 | filenames = [] 98 | msg_id = info['emailId'] 99 | for m in db.create_query('id:%s' % msg_id).search_messages(): 100 | for mp in m.get_message_parts(): 101 | if mp.get_content_type() == 'application/pdf' or (mp.get_content_type() == 'application/octet-stream' and mp.get_filename().lower().endswith('.pdf')): 102 | filename = 'file_id%d.pdf' % file_id 103 | with open(filename, 'w') as f: 104 | f.write(mp.get_payload(decode=True)) 105 | filenames.append((mp.get_filename(), filename)) 106 | file_id += 1 107 | info['pdffiles'] = ' | '.join(['%s' % (filename, origfilename) for (origfilename, filename) in filenames]) 108 | 109 | if previous is not None and (flightday - previous).days > 14: 110 | delta = (flightday - previous).days 111 | print '=============', delta, 'days later' 112 | fout.write(""" 113 | 114 | 116 | """ % delta) 117 | else: 118 | fout.write(""" 119 | 120 | 122 | """) 123 | previous = flightday 124 | info['departureDay'] = flightday.strftime('%Y-%m-%d') 125 | info['departureJustTime'] = info['departureTime'].strftime('%H:%M') 126 | info['emailday'] = info['emailTime'].date().strftime('%Y-%m-%d') 127 | 128 | print """ 129 | %(departureDay)s Flight %(departure)s --> %(arrival)s 130 | 131 | Departing %(departureTimestr)s %(boardingTime)s 132 | from %(departure)s %(departureTerminal)s %(departureGate)s 133 | arriving %(arrivalTimestr)s 134 | To %(arrival)s %(arrivalTerminal)s %(arrivalGate)s 135 | Flight number %(flightNumber)s with %(airline)s%(operator)s 136 | %(ticketNumber)s %(ticketText)s %(ticketDownload)s %(ticketPrint)s 137 | 138 | Email %(emailday)s "%(emailSubject)s" 139 | 140 | 141 | """ % info 142 | fout.write((""" 143 | 155 | 156 | 157 | 164 | 165 | 166 | 176 | 177 | 178 | 182 | """ % info).encode('utf-8')) 183 | 184 | -------------------------------------------------------------------------------- /emailparser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | import notmuch 4 | import BeautifulSoup 5 | import datetime 6 | import dateutil.parser 7 | import dateparser 8 | 9 | import logging 10 | from tzlocal import get_localzone 11 | 12 | # this makes everything utf8 13 | import sys 14 | reload(sys) 15 | sys.setdefaultencoding('utf-8') 16 | 17 | # this disables relative dates 18 | import dateparser.date 19 | dateparser.date._DateLanguageParser._try_freshness_parser = lambda self: False 20 | 21 | def get_value(reservation, element, itemprop, default): 22 | node = reservation.find(element, itemprop=itemprop) 23 | if node is None: 24 | return default 25 | else: 26 | attrs = dict(node.attrs) 27 | if element == 'link': 28 | return attrs.get('href', default) 29 | elif element == 'meta': 30 | return attrs.get('content', default) 31 | def get_name(parent, itemprop, itemtype, default, childitemprop='name'): 32 | node = parent.find('div', itemprop=itemprop, itemtype=itemtype) 33 | if node is None: 34 | return default 35 | return get_value(node, 'meta', childitemprop, default) 36 | 37 | def get_code(parent, itemprop, itemtype, default): 38 | return get_name(parent, itemprop, itemtype, default, childitemprop='iataCode') 39 | 40 | def prepend(info, k, prefix): 41 | if info[k] and info[k] != '': 42 | info[k] = prefix + info[k] 43 | 44 | from six.moves.html_parser import HTMLParser 45 | h = HTMLParser() 46 | 47 | def nicefy_htmltext(txt): 48 | el = h.unescape(txt.strip()) 49 | el = el.strip('.').replace('\n', ' ').replace('\t', ' ').replace(' ', ' ').replace(' ', ' ').replace(' ', ' ').strip() 50 | return el 51 | 52 | def parse_field(v): 53 | # split td field into components 54 | vs = [] 55 | #for el in list(v) + list(v.findChildren()): 56 | for el in list(v.recursiveChildGenerator()) + list(v): 57 | if hasattr(el, 'text'): 58 | el = el.text 59 | el = nicefy_htmltext(el) 60 | if '<' in el or '>' in el: 61 | continue 62 | if len(el) != 0 and len(el) < 200: 63 | vs.append(el) 64 | return vs 65 | 66 | def shorten_airport(name): 67 | name = nicefy_htmltext(name).strip('#') 68 | if len(name) < 8: 69 | return name 70 | if '(' not in name: 71 | return name 72 | part = name.split('(')[1] 73 | if ')' not in part: 74 | return name 75 | part = part.split(')')[0] 76 | if len(part) != 3: 77 | return name 78 | return part 79 | 80 | def parsedate_simple(s): 81 | logf = logging.getLogger('emailparser.parsedate') 82 | logf.info('simple date parsing "%s"...' % s) 83 | if s == '': 84 | return '' 85 | return dateutil.parser.parse(s) 86 | def parsedate(s, default, languages=None): 87 | logf = logging.getLogger('emailparser.parsedate') 88 | logf.info('date parsing "%s"...' % s) 89 | try: 90 | return dateutil.parser.parse(s, default=default.replace(second=0, minute=0, hour=0)) 91 | except ValueError as e: 92 | r = dateparser.parse(s, languages=languages, settings= 93 | dateparser.conf.Settings().replace( 94 | RELATIVE_BASE=default.replace(second=0, minute=0, hour=0))) 95 | if r is None: 96 | raise e 97 | else: 98 | return r 99 | 100 | previous_dates = {} 101 | 102 | def parsedate_cached(s, default, languages=None): 103 | logf = logging.getLogger('emailparser.parsedate_cached') 104 | s = s.replace('\n', ' ').replace('\t', ' ').replace(' ', ' ').replace(' ', ' ').replace(' ', ' ') 105 | if len(s) > 50: 106 | raise ValueError('too long for a date') 107 | if len(s) < 4: 108 | raise ValueError('too short for a date') 109 | if not any([n in s for n in '0123456789']): 110 | raise ValueError('numbers expected in a date') 111 | k = (s, default) 112 | if k not in previous_dates: 113 | try: 114 | d = parsedate(s, default=default, languages=languages) 115 | if d.year < 1980 or d.year > datetime.datetime.now().year + 2: 116 | logf.warn('problem parsing "%s": outside reasonable date range' % (s)) 117 | d = None 118 | previous_dates[k] = d 119 | except OverflowError as e: 120 | logf.warn('problem parsing "%s": %s' % (s, e)) 121 | previous_dates[k] = None 122 | except Exception as e: 123 | logf.warn('problem parsing "%s": %s' % (s, e)) 124 | previous_dates[k] = None 125 | if previous_dates[k] is None: 126 | raise ValueError(s) 127 | else: 128 | return previous_dates[k] 129 | 130 | def parse_flight(columns, values, global_info, languages=None): 131 | logf = logging.getLogger('emailparser.parse_flight') 132 | info = {} 133 | defaultdate = global_info['emailTime'] 134 | logf.info('defaultdate(email) <- %s' % defaultdate) 135 | for c, v in zip(columns, values): 136 | logf.debug('parsing row: column %s: "%s"' % (c, v)) 137 | if c.lower() in ['departs', 'departure', 'departure city and time', 'from', 'salida']: 138 | logf.debug('parsing departure: "%s"' % v) 139 | for vi in parse_field(v): 140 | logf.debug('parsing departure component: "%s"' % vi) 141 | # try to parse as time 142 | try: 143 | time = parsedate_cached(vi, default=defaultdate, languages=languages) 144 | if time.hour != 0 or time.minute != 0: 145 | logf.info('departureTime <- %s' % time) 146 | info['departureTime'] = time 147 | except ValueError as e: 148 | # could be a location 149 | v = shorten_airport(vi) 150 | if is_airport(v): 151 | logf.info('departure (location) <- %s (date: %s)' % (v, e)) 152 | info['departure'] = v 153 | elif c.lower() in ['arrives', 'arrival', 'arrival city and time', 'to', 'llegada']: 154 | logf.debug('parsing arrival: "%s"' % v) 155 | for vi in parse_field(v): 156 | logf.debug('parsing arrival component: "%s"' % vi) 157 | # try to parse as time 158 | try: 159 | time = parsedate_cached(vi, default=defaultdate, languages=languages) 160 | if time.hour != 0 or time.minute != 0: 161 | logf.info('arrivalTime <- %s' % time) 162 | info['arrivalTime'] = time 163 | except ValueError as e: 164 | # could be a location 165 | v = shorten_airport(vi) 166 | if is_airport(v): 167 | logf.info('arrival (location) <- %s (date: %s)' % (v, e)) 168 | info['arrival'] = v 169 | elif c.lower() in ['day, date']: 170 | day = nicefy_htmltext(v.text) 171 | if day != '': 172 | logf.debug('parsing day "%s"' % day) 173 | try: 174 | defaultdate = parsedate_cached(day, default=defaultdate, languages=languages) 175 | logf.info('defaultdate <- %s' % defaultdate) 176 | except ValueError as e: 177 | try: 178 | defaultdate = datetime.datetime.strptime(day, '%a, %d%b%y') 179 | logf.info('defaultdate <- %s' % defaultdate) 180 | except ValueError as e: 181 | logf.debug('failed to parse as day "%s" (%s)' % (day[:100], e)) 182 | pass 183 | elif c.lower() in ['flight', 'flights', 'vuelo \xe2\x84\x96']: 184 | flight = nicefy_htmltext(v.text.strip()) 185 | #if flight.startswith('Seat'): 186 | # logf.info('airplaneSeat <- "%s"' % flight) 187 | # info['airplaneSeat'] = flight 188 | #else: 189 | logf.info('flightNumber <- "%s"' % flight) 190 | info['flightNumber'] = flight 191 | for k in 'operado por', 'operated by': 192 | if k in flight.lower(): 193 | i = flight.lower().index(k) 194 | flightNumber = flight[:i].strip() 195 | operator = flight[i+len(k):].strip() 196 | logf.info('flightNumber <- "%s"' % flightNumber) 197 | info['flightNumber'] = flightNumber 198 | logf.info('operator <- "%s"' % operator) 199 | info['operator'] = operator 200 | elif c.lower() in ['airline']: 201 | airline = v.text.strip() 202 | try: 203 | airline, flightNumber = airline.split('#') 204 | logf.info('airline <- "%s"' % airline.strip()) 205 | info['airline'] = airline.strip() 206 | logf.info('flightNumber <- "%s"' % flightNumber.strip()) 207 | info['flightNumber'] = flightNumber.strip() 208 | except: 209 | logf.info('airline <- "%s"' % airline.strip()) 210 | info['airline'] = airline.strip() 211 | else: 212 | logf.debug('unhandled column "%s" with content: "%s"' % (c, v.text)) 213 | 214 | if len(info) > 0: 215 | logf.info('learned flight info: %s' % info) 216 | 217 | info.update(global_info) 218 | all_keys = ['reservationNumber', 'checkinUrl', 'ticketNumber', 'ticketDownload', 219 | 'ticketPrint', 'ticketText', 'airplaneSeat', 'boardingGroup', 'flightNumber', 220 | 'airline', 'operator', 'departure', 'boardingTime', 'departureTime', 'departureGate', 221 | 'departureTerminal', 'arrival', 'arrivalTime', 'arrivalGate', 'arrivalTerminal'] 222 | for k in all_keys: 223 | if k not in info: 224 | info[k] = '' 225 | return info 226 | 227 | def is_airport(v): 228 | if len(v) < 3: 229 | return False 230 | if any([n in v for n in '0123456789']): 231 | return False 232 | if len(v.split()) > 5: 233 | return False 234 | if v.startswith('('): 235 | return False 236 | invalid_symbols = ':@#' 237 | if any(s in v for s in invalid_symbols) or v.startswith('/'): 238 | return False 239 | return True 240 | 241 | stopwords = ['estimated', 'total', 242 | 'cabin: ', 'take this survey', 'callout panel', 'award miles', 243 | 'passengers', 'right column', 244 | 'foreign affairs', 'security', 'book now', 'see deal', 245 | 'enjoy', 'experience', 'entertainment', 'footer', 'awards', '®', 246 | 'twitter', ' your ', 'requires', 'approval', 'www', 247 | '@', '#', 248 | ] + ['january', 'february', 'march', 'april', 'july', 'august', 'september', 'october', 'november', 'december'] 249 | 250 | def is_flight(info): 251 | required_keys = ['departureTime', 'departure', 'arrivalTime', 'arrival'] 252 | logf = logging.getLogger('emailparser.is_flight') 253 | logf.info('checking if is a flight: %s' % info) 254 | if not all([k in info and info[k] != '' for k in required_keys]): 255 | return False 256 | for k in 'departure', 'arrival': 257 | v = info[k].lower() 258 | if v in ['manage flight', 'airport Lounges', 'total estimated cost', 'here']: 259 | return False 260 | if any([stopword in v for stopword in stopwords]): 261 | return False 262 | # if not is_airport(info[k]): 263 | # return False 264 | logf.info('yes, is a flight: %s' % info) 265 | return True 266 | 267 | def replace_booking_number(info, key, number): 268 | if key not in info or info[key] == number: 269 | info[key] = number 270 | return 271 | info[key] = info[key] + ', ' + number 272 | 273 | def parse_flight_info(columns, values): 274 | global_info = {} 275 | logf = logging.getLogger('emailparser.parse_flight_info') 276 | logf.debug('parsing row: %s %s' % (columns, [str(v)[:200] for v in values])) 277 | for c, v in zip(columns, values): 278 | number = v.text 279 | if c.lower() in ['eticket number', 'flight confirmation number', 'airline booking number', 'reservation number', 'código de reserva', 'código de reservación', 'buchungsnummer', 'pnr #']: 280 | logf.info('found ticketNumber key "%s" -> %s' % (c, number)) 281 | if is_airline_booking_number(number): 282 | replace_booking_number(global_info, 'ticketNumber', number) 283 | logf.info('ticketNumber <- %s' % number) 284 | for c, v in zip(columns, values): 285 | if c.lower() in ['eticket number', 'booking id', 'booking number', 'e-ticket #']: 286 | number = v.text 287 | logf.info('found booking number key "%s" -> %s' % (c, number)) 288 | if is_ticket_number(number) and 'bookingNumber' not in global_info: 289 | replace_booking_number(global_info, 'bookingNumber', number) 290 | global_info['bookingNumber'] = number 291 | logf.info('bookingNumber <- %s' % number) 292 | if is_airline_booking_number(number) and 'ticketNumber' not in global_info: 293 | replace_booking_number(global_info, 'ticketNumber', number) 294 | global_info['ticketNumber'] = number 295 | logf.info('ticketNumber <- %s' % number) 296 | if c.lower() in ['seats']: 297 | global_info['airplaneSeat'] = v.text 298 | if len(global_info) > 0: 299 | logf.info('learned new global info: %s' % global_info) 300 | return global_info 301 | 302 | def is_airline_booking_number(number): 303 | if len(number) != 6: 304 | return False 305 | for d in number: 306 | if d not in '0123456789QWERTZUIOPASDFGHJKLYXCVBNM': 307 | return False 308 | return True 309 | def is_ticket_number(number): 310 | if len(number) < 6 or len(number) > 20: 311 | return False 312 | for d in number: 313 | if d not in '0123456789QWERTZUIOPASDFGHJKLYXCVBNM': 314 | return False 315 | return True 316 | 317 | def default_logging(): 318 | logging.basicConfig(filename='emailparser.log',level=logging.DEBUG) 319 | logFormatter = logging.Formatter("[%(name)s %(levelname)s]: %(message)s") 320 | consoleHandler = logging.StreamHandler() 321 | consoleHandler.setFormatter(logFormatter) 322 | consoleHandler.setLevel(logging.INFO) 323 | logging.getLogger().addHandler(consoleHandler) 324 | 325 | def parse_email_html_message(t, languages = None, email_info={}): 326 | logf = logging.getLogger('emailparser.html') 327 | reservations = [] 328 | if 'schema.org' in t: 329 | logf.info('parsing with schema.org information') 330 | b = BeautifulSoup.BeautifulSoup(t) 331 | for fr in b.findAll('div', itemtype="http://schema.org/FlightReservation"): 332 | fl = fr.find(itemprop="reservationFor", itemtype="http://schema.org/Flight") 333 | if fl is None: 334 | logf.debug('no reservationFor element in %s' % fr) 335 | continue 336 | info = dict( 337 | reservationNumber = get_value(fr, 'meta', "reservationNumber", ''), 338 | checkinUrl = get_value(fr, 'link', "checkinUrl", ''), 339 | ticketNumber = get_value(fr, 'meta', "ticketNumber", ''), 340 | ticketDownload = get_value(fr, 'link', "ticketDownloadUrl", ''), 341 | ticketPrint = get_value(fr, 'link', "ticketPrintUrl", ''), 342 | ticketText = get_value(fr, 'meta', "additionalTicketText", ''), 343 | airplaneSeat = get_value(fr, 'meta', "airplaneSeat", ''), 344 | boardingGroup = get_value(fr, 'meta', "boardingGroup", ''), 345 | flightNumber = get_value(fl, 'meta', 'flightNumber', ''), 346 | airline = get_name(fl, 'airline', 'http://schema.org/Airline', ''), 347 | operator = get_name(fl, 'operatedBy', 'http://schema.org/Airline', ''), 348 | departure = get_code(fl, 'departureAirport', 'http://schema.org/Airport', ''), 349 | boardingTime = parsedate_simple(get_value(fl, 'meta', 'boardingTime', '')), 350 | departureTime = parsedate_simple(get_value(fl, 'meta', 'departureTime', '')), 351 | departureGate = get_value(fl, 'meta', 'departureGate', ''), 352 | departureTerminal = get_value(fl, 'meta', 'departureTerminal', ''), 353 | arrival = get_code(fl, 'arrivalAirport', 'http://schema.org/Airport', ''), 354 | arrivalTime = parsedate_simple(get_value(fl, 'meta', 'arrivalTime', '')), 355 | arrivalGate = get_value(fl, 'meta', 'arrivalGate', ''), 356 | arrivalTerminal = get_value(fl, 'meta', 'arrivalTerminal', '') 357 | ) 358 | if info['departureTime'] == '': 359 | logf.warn('skipping: not enough schema.org information, missing departureTime') 360 | logf.debug(str(info)) 361 | continue 362 | # add email subject and date 363 | info.update(email_info) 364 | reservations.append(info) 365 | 366 | if len(reservations) == 0: 367 | b = BeautifulSoup.BeautifulSoup(t) 368 | logf.info('parsing html email') 369 | global_info = dict(email_info) 370 | txt = b.text 371 | txtlower = txt.lower() 372 | 373 | for key, languages1 in ('flight confirmation number', ['en']), ('airline booking number', ['en']), ('confirmation', ['en']), ('digo de reserva', ['es']): 374 | if key in txtlower: 375 | logf.debug('found key: "%s"' % key) 376 | languages = languages1 377 | try: 378 | i = txtlower.index(key) + len(key) 379 | for number in txt[i:i+1000].split(':')[1:4:2]: 380 | logf.debug('parsing flight confirmation number: %s: %s' % (key, number)) 381 | number = number.strip()[:6] 382 | if is_airline_booking_number(number): 383 | global_info['ticketNumber'] = number 384 | logf.info('ticketNumber <- "%s"' % number) 385 | except Exception as e: 386 | logf.warn('parsing %s failed: %s' % (key, e)) 387 | for key, languages1 in ('booking id', ['en']), ('booking number', ['en']), ('buchungsnummer', ['de']): 388 | if key in txtlower: 389 | languages = languages1 390 | logf.debug('found key: "%s"' % key) 391 | try: 392 | i = txtlower.index(key) + len(key) 393 | number = txt[i:i+1000].split(':')[1] 394 | logf.debug('parsing booking number: %s: %s' % (key, number)) 395 | for j in range(len(number)): 396 | if number[j] not in '0123456789QWERTZUIOPASDFGHJKLYXCVBNM': 397 | break 398 | if j == 0: 399 | continue 400 | number = number[:j] 401 | if is_ticket_number(number) and 'bookingNumber' not in global_info: 402 | global_info['bookingNumber'] = number 403 | logf.info('bookingNumber <- "%s"' % number) 404 | if is_airline_booking_number(number) and 'ticketNumber' not in global_info: 405 | global_info['ticketNumber'] = number 406 | logf.info('ticketNumber <- "%s"' % number) 407 | except Exception as e: 408 | logf.warn('parsing %s failed: %s' % (key, e)) 409 | if languages is None: 410 | if any([v in txtlower for v in 'confirmacion', 'itinerario', 'viaje']): 411 | languages = ['es'] 412 | logf.info('language detected: es') 413 | if any([v in txtlower for v in 'buchung', 'rechnung', 'flugzeug', 'stunden', 'danke', 'vielen dank']): 414 | languages = ['de'] 415 | logf.info('language detected: de') 416 | if any([v in txtlower for v in 'passenger information', 'traveler information', 'departs', 'arrives', 'reconfirm', 'payment', 'itinerary', 'receipt', 'confirmation', 'thank you']): 417 | logf.info('language detected: en') 418 | languages = ['en'] 419 | if languages is None: 420 | logf.warn('language unsure') 421 | for table in b.findAll('table'): 422 | # make dictionaries for vertical tables 423 | header = [] 424 | #logf.debug('') 425 | #logf.debug('') 426 | #logf.debug('found table: %s' % (table)) 427 | #logf.debug('') 428 | #logf.debug('') 429 | rows = table.findChildren('tr') 430 | override_header = True 431 | for row in rows: 432 | # build header column 433 | newheader = [] 434 | for th in row.findChildren('th'): 435 | newheader.append(nicefy_htmltext(th.text.strip().strip(':'))) 436 | override_header = True 437 | if len(newheader) == 0: 438 | for td in row.findChildren('td'): 439 | # has h1,h2,h3,h4 as child? probably a header 440 | header_names = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] 441 | if any([c.name in header_names 442 | for c in td.recursiveChildGenerator() if hasattr(c, 'name')]): 443 | override_header = True 444 | newheader.append(nicefy_htmltext(td.text.strip().strip(':'))) 445 | if len(newheader) == 1 or all([len(h) > 100 for h in newheader]): 446 | newheader = [] 447 | if override_header: 448 | logf.debug('table header: %s' % newheader) 449 | if override_header and len(newheader) > 0 and any([len(h) > 0 for h in newheader]): 450 | header = newheader 451 | logf.debug('new header assigned') 452 | override_header = False 453 | continue 454 | if len(header) != 0: 455 | override_header = False 456 | # deal with content 457 | values = [] 458 | for td in row.findChildren('td'): 459 | values.append(td) 460 | 461 | if len(header) > 0: 462 | info = parse_flight(header, values, global_info, languages=languages) 463 | if is_flight(info): 464 | if info not in reservations: 465 | reservations.append(info) 466 | else: 467 | global_info.update(parse_flight_info(header, values)) 468 | else: 469 | logf.info('skipping row, no header found so far, in "%s"' % row) 470 | continue 471 | if len(reservations) == 0: 472 | for table in b.findAll('table'): 473 | # make dictionaries for vertical tables 474 | rows = table.findChildren('tr') 475 | for row in rows: 476 | values = row.findChildren('td') 477 | # could be this information: 478 | logf.info('no header, trying something with "%s"' % values) 479 | testheader = ['Day, date', 'Departure', 'Arrival', 'Flight'] 480 | info = parse_flight(testheader, values, global_info, languages=languages) 481 | if is_flight(info): 482 | if info not in reservations: 483 | reservations.append(info) 484 | if len(reservations) == 0: 485 | # try making bullet points out of all horizontal tables 486 | header = [] 487 | values = [] 488 | logf.info('horizontal parsing') 489 | info = {} 490 | for row in b.findAll('tr'): 491 | cells = row.findChildren('td') 492 | if len(cells) == 0: 493 | continue 494 | cellheaders = row.findChildren('th') 495 | logf.debug('learning from row: [%s] %s' % ( 496 | [c.text[:100] for c in cellheaders], 497 | [c.text[:100] for c in cells])) 498 | if len(cellheaders) == 0: 499 | cell = cells[0].text.strip() 500 | cells = cells[1:] 501 | else: 502 | cell = cellheaders[0].text.strip() 503 | if len(cells) > 0 and cell.endswith(':'): 504 | key = cell.rstrip(':').strip() 505 | for v in cells: 506 | if len(v.text) > 0 and len(v.text) < 100: 507 | logf.info('learned fact: %s = %s' % (key, v.text)) 508 | header.append(nicefy_htmltext(key)) 509 | values.append(v) 510 | elif ' to ' in cell and len(cell) < 150 and '.' not in cell and ' you ' not in cell and ' do ' not in cell: 511 | parts = cell.split(' to ') 512 | if len(parts) == 2: 513 | logf.info('learned from-to: %s' % parts) 514 | departure, arrival = parts 515 | info['departure'] = departure 516 | info['arrival'] = arrival 517 | else: 518 | logf.info('strange from-to: %s' % cell) 519 | elif ':' in cell and len(cell) < 150 and '.' not in cell and ' you ' not in cell and ' do ' not in cell: 520 | parts = cell.split(':') 521 | if len(parts) == 2: 522 | key, v = parts 523 | key, v = key.strip(), v.strip() 524 | v = BeautifulSoup.BeautifulSoup(v) 525 | logf.info('learned fact: %s = %s' % (key, v)) 526 | header.append(nicefy_htmltext(key)) 527 | values.append(v) 528 | else: 529 | logf.info('strange fact: %s' % cell) 530 | 531 | 532 | #for k, v in items: 533 | # logf.info('learned following fact: %s = %s' % (k, v)) 534 | logf.info('finding global info %s -> %s' % (header, [v.text for v in values])) 535 | global_info.update(parse_flight_info(header, values)) 536 | info.update(global_info) 537 | logf.info('finding flight info %s -> %s' % (header, [v.text for v in values])) 538 | info = parse_flight(header, values, info, languages=languages) 539 | if is_flight(info): 540 | if info not in reservations: 541 | reservations.append(info) 542 | for info in reservations: 543 | for datekey in ['departureTime', 'arrivalTime', 'boardingTime']: 544 | if info[datekey] != '': 545 | info[datekey + 'str'] = info[datekey].strftime('%Y-%m-%d %H:%M') 546 | else: 547 | info[datekey + 'str'] = '' 548 | return reservations 549 | 550 | def parse_email_message(m, languages = None): 551 | logf = logging.getLogger('emailparser') 552 | email_info = {} 553 | email_info['emailTime'] = datetime.datetime.fromtimestamp(m.get_date()) 554 | email_info['emailSubject'] = m.get_header('Subject') 555 | reservations = [] 556 | for mp in m.get_message_parts(): 557 | logf.info('Subject:' + m.get_header('Subject')) 558 | t = mp.get_payload(decode=True) 559 | if mp.get_content_type() == 'text/html': 560 | reservations += parse_email_html_message(t, languages=languages, email_info=email_info) 561 | elif mp.get_content_type() == 'text/plain': 562 | logf.debug('message: text/plain: %s' % t) 563 | else: 564 | logf.debug('message: other content type: "%s"' % mp.get_content_type()) 565 | return reservations 566 | 567 | def parse_multiple_email_messages(query_results, languages=None): 568 | all_reservations = [] 569 | import joblib 570 | call = joblib.delayed(parse_email_html_message) 571 | calls = [] 572 | messages = list(query_results) 573 | logf = logging.getLogger('emailparser') 574 | for i, m in enumerate(messages): 575 | logf.info('handling %d/%d: "%s" from %s' % (i, len(messages), m.get_header('Subject'), 576 | datetime.datetime.fromtimestamp(m.get_date()).strftime('%Y-%m-%d'))) 577 | email_info = {} 578 | email_info['emailTime'] = datetime.datetime.fromtimestamp(m.get_date()) 579 | email_info['emailSubject'] = m.get_header('Subject') 580 | email_info['emailId'] = m.get_message_id() 581 | for mp in m.get_message_parts(): 582 | t = mp.get_payload(decode=True) 583 | if mp.get_content_type() == 'text/html': 584 | calls.append(call(t, languages=languages, email_info=email_info)) 585 | elif mp.get_content_type() == 'text/plain': 586 | logf.debug('message: text/plain: %s' % t) 587 | else: 588 | logf.debug('message: other content type: %s' % mp.get_content_type()) 589 | 590 | logf.info('processing in parallel...') 591 | for reservations_part in joblib.Parallel(n_jobs=-1)(calls): 592 | all_reservations += reservations_part 593 | logf.info('%d reservations' % len(all_reservations)) 594 | return all_reservations 595 | 596 | 597 | -------------------------------------------------------------------------------- /summary.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Flight summary 5 | 6 | 7 | 8 |

Flights

9 |
%d days later 115 |
  121 |
144 |
From
145 | %(departure)s 146 | %(departureTerminal)s 147 | %(departureGate)s 148 |
✈ 149 | 150 |
Destination
151 | %(arrival)s 152 | %(arrivalTerminal)s 153 | %(arrivalGate)s 154 |
158 |
Depart
159 | %(departureJustTime)s 160 |
161 |
Date
162 | %(departureDay)s 163 |
167 |
Arriving
168 | %(arrivalTimestr)s 169 |
Flight number
170 | Flight number %(flightNumber)s with %(airline)s%(operator)s 171 |
Ticket
172 | %(ticketNumber)s %(ticketText)s %(ticketDownload)s %(ticketPrint)s 173 |
%(boardingTime)s
174 |
%(pdffiles)s
175 |
10 | 11 | 12 | 14 | 15 | 27 | 28 | 29 | 36 | 37 | 38 | 48 | 49 | 50 | 54 | 55 | 56 | 58 | 59 | 71 | 72 | 73 | 80 | 81 | 82 | 92 | 93 | 94 | 98 | 99 | 100 | 102 | 103 | 115 | 116 | 117 | 124 | 125 | 126 | 136 | 137 | 138 | 142 | 143 | 144 | 146 | 147 | 159 | 160 | 161 | 168 | 169 | 170 | 180 | 181 | 182 | 186 | 187 | 188 | 190 | 191 | 203 | 204 | 205 | 212 | 213 | 214 | 224 | 225 | 226 | 230 | 231 | 232 | 234 | 235 | 247 | 248 | 249 | 256 | 257 | 258 | 268 | 269 | 270 | 274 | 275 | 276 | 278 | 279 | 291 | 292 | 293 | 300 | 301 | 302 | 312 | 313 | 314 | 318 | 319 | 320 | 322 | 323 | 335 | 336 | 337 | 344 | 345 | 346 | 356 | 357 | 358 | 362 | 363 | 364 | 366 | 367 | 379 | 380 | 381 | 388 | 389 | 390 | 400 | 401 | 402 | 406 | 407 | 408 | 410 | 411 | 423 | 424 | 425 | 432 | 433 | 434 | 444 | 445 | 446 | 450 | 451 | 452 | 454 | 455 | 467 | 468 | 469 | 476 | 477 | 478 | 488 | 489 | 490 | 494 | 495 | 496 | 498 | 499 | 511 | 512 | 513 | 520 | 521 | 522 | 532 | 533 | 534 | 538 | 539 | 540 | 542 | 543 | 555 | 556 | 557 | 564 | 565 | 566 | 576 | 577 | 578 | 582 | 583 | 584 | 586 | 587 | 599 | 600 | 601 | 608 | 609 | 610 | 620 | 621 | 622 | 626 | 627 | 628 | 630 | 631 | 643 | 644 | 645 | 652 | 653 | 654 | 664 | 665 | 666 | 670 | 671 | 672 | 674 | 675 | 687 | 688 | 689 | 696 | 697 | 698 | 708 | 709 | 710 | 714 | 715 | 716 | 718 | 719 | 731 | 732 | 733 | 740 | 741 | 742 | 752 | 753 | 754 | 758 | 759 | 760 | 762 | 763 | 775 | 776 | 777 | 784 | 785 | 786 | 796 | 797 | 798 | 802 | 803 | 804 | 806 | 807 | 819 | 820 | 821 | 828 | 829 | 830 | 840 | 841 | 842 | 846 | 847 | 848 | 850 | 851 | 863 | 864 | 865 | 872 | 873 | 874 | 884 | 885 | 886 | 890 | 891 | 892 | 894 | 895 | 907 | 908 | 909 | 916 | 917 | 918 | 928 | 929 | 930 | 934 | 935 | 936 | 938 | 939 | 951 | 952 | 953 | 960 | 961 | 962 | 972 | 973 | 974 | 978 | 979 | 980 | 982 | 983 | 995 | 996 | 997 | 1004 | 1005 | 1006 | 1016 | 1017 | 1018 | 1022 | 1023 | 1024 | 1026 | 1027 | 1039 | 1040 | 1041 | 1048 | 1049 | 1050 | 1060 | 1061 | 1062 | 1066 | 1067 | 1068 | 1070 | 1071 | 1083 | 1084 | 1085 | 1092 | 1093 | 1094 | 1104 | 1105 | 1106 | 1110 | 1111 | 1112 | 1114 | 1115 | 1127 | 1128 | 1129 | 1136 | 1137 | 1138 | 1148 | 1149 | 1150 | 1154 | 1155 | 1156 | 1158 | 1159 | 1171 | 1172 | 1173 | 1180 | 1181 | 1182 | 1192 | 1193 | 1194 | 1198 | 1199 | 1200 | 1202 | 1203 | 1215 | 1216 | 1217 | 1224 | 1225 | 1226 | 1236 | 1237 | 1238 | 1242 | 1243 | 1244 | 1246 | 1247 | 1259 | 1260 | 1261 | 1268 | 1269 | 1270 | 1280 | 1281 | 1282 | 1286 | 1287 | 1288 | 1290 | 1291 | 1303 | 1304 | 1305 | 1312 | 1313 | 1314 | 1324 | 1325 | 1326 | 1330 | 1331 | 1332 | 1334 | 1335 | 1347 | 1348 | 1349 | 1356 | 1357 | 1358 | 1368 | 1369 | 1370 | 1374 | 1375 | 1376 | 1378 | 1379 | 1391 | 1392 | 1393 | 1400 | 1401 | 1402 | 1412 | 1413 | 1414 | 1418 | 1419 | 1420 | 1422 | 1423 | 1435 | 1436 | 1437 | 1444 | 1445 | 1446 | 1456 | 1457 | 1458 | 1462 | 1463 | 1464 | 1466 | 1467 | 1479 | 1480 | 1481 | 1488 | 1489 | 1490 | 1500 | 1501 | 1502 | 1506 | 1507 | 1508 | 1510 | 1511 | 1523 | 1524 | 1525 | 1532 | 1533 | 1534 | 1544 | 1545 | 1546 | 1550 | 1551 | 1552 | 1554 | 1555 | 1567 | 1568 | 1569 | 1576 | 1577 | 1578 | 1588 | 1589 | 1590 | 1594 | 1595 | 1596 | 1598 | 1599 | 1611 | 1612 | 1613 | 1620 | 1621 | 1622 | 1632 | 1633 | 1634 | 1638 | 1639 | 1640 | 1642 | 1643 | 1655 | 1656 | 1657 | 1664 | 1665 | 1666 | 1676 | 1677 | 1678 | 1682 | 1683 | 1684 | 1686 | 1687 | 1699 | 1700 | 1701 | 1708 | 1709 | 1710 | 1720 | 1721 | 1722 | 1726 | 1727 | 1728 | 1730 | 1731 | 1743 | 1744 | 1745 | 1752 | 1753 | 1754 | 1764 | 1765 | 1766 | 1770 | 1771 | 1772 | 1774 | 1775 | 1787 | 1788 | 1789 | 1796 | 1797 | 1798 | 1808 | 1809 | 1810 | 1814 | 1815 | 1816 | 1818 | 1819 | 1831 | 1832 | 1833 | 1840 | 1841 | 1842 | 1852 | 1853 | 1854 | 1858 | 1859 | 1860 | 1862 | 1863 | 1875 | 1876 | 1877 | 1884 | 1885 | 1886 | 1896 | 1897 | 1898 | 1902 | 1903 | 1904 | 1906 | 1907 | 1919 | 1920 | 1921 | 1928 | 1929 | 1930 | 1940 | 1941 | 1942 | 1946 | 1947 | 1948 | 1950 | 1951 | 1963 | 1964 | 1965 | 1972 | 1973 | 1974 | 1984 | 1985 | 1986 | 1990 | 1991 | 1992 | 1994 | 1995 | 2007 | 2008 | 2009 | 2016 | 2017 | 2018 | 2028 | 2029 | 2030 | 2034 | -------------------------------------------------------------------------------- /mb2md-3.20.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | # 3 | # $Id: mb2md.pl,v 1.26 2004/03/28 00:09:46 juri Exp $ 4 | # 5 | # mb2md-3.20.pl Converts Mbox mailboxes to Maildir format. 6 | # 7 | # Public domain. 8 | # 9 | # currently maintained by: 10 | # Juri Haberland 11 | # initially wrote by: 12 | # Robin Whittle 13 | # 14 | # This script's web abode is http://batleth.sapienti-sat.org/projects/mb2md/ . 15 | # For a changelog see http://batleth.sapienti-sat.org/projects/mb2md/changelog.txt 16 | # 17 | # The Mbox -> Maildir inner loop is based on qmail's script mbox2maildir, which 18 | # was kludged by Ivan Kohler in 1997 from convertandcreate (public domain) 19 | # by Russel Nelson. Both these convert a single mailspool file. 20 | # 21 | # The qmail distribution has a maildir2mbox.c program. 22 | # 23 | # What is does: 24 | # ============= 25 | # 26 | # Reads a directory full of Mbox format mailboxes and creates a set of 27 | # Maildir format mailboxes. Some details of this are to suit Courier 28 | # IMAP's naming conventions for Maildir mailboxes. 29 | # 30 | # http://www.inter7.com/courierimap/ 31 | # 32 | # This is intended to automate the conversion of the old 33 | # /var/spool/mail/blah file - with one call of this script - and to 34 | # convert one or more mailboxes in a specifed directory with separate 35 | # calls with other command line arguments. 36 | # 37 | # Run this as the user - in these examples "blah". 38 | 39 | # This version supports conversion of: 40 | # 41 | # Date The date-time in the "From " line of the message in the 42 | # Mbox format is the date when the message was *received*. 43 | # This is transformed into the date-time of the file which 44 | # contains the message in the Maildir mailbox. 45 | # 46 | # This relies on the Date::Parse perl module and the utime 47 | # perl function. 48 | # 49 | # The script tries to cope with errant forms of the 50 | # Mbox "From " line which it may encounter, but if 51 | # there is something really screwy in a From line, 52 | # then perhaps the script will fail when "touch" 53 | # is given an invalid date. Please report the 54 | # exact nature of any such "From " line! 55 | # 56 | # 57 | # Flagged 58 | # Replied 59 | # Read = Seen 60 | # Tagged for Deletion 61 | # 62 | # In the Mbox message, flags for these are found in the 63 | # "Status: N" or "X-Status: N" headers, where "N" is 0 64 | # or more of the following characters in the left column. 65 | # 66 | # They are converted to characters in the right column, 67 | # which become the last characters of the file name, 68 | # following the ":2," which indicates IMAP message status. 69 | # 70 | # 71 | # F -> F Flagged 72 | # A -> R Replied 73 | # R -> S Read = Seen 74 | # D -> T Tagged for Deletion (Trash) 75 | # 76 | # This is based on the work of Philip Mak who wrote a 77 | # completely separate Mbox -> Maildir converter called 78 | # perfect_maildir and posted it to the Mutt-users mailing 79 | # list on 25 December 2001: 80 | # 81 | # http://www.mail-archive.com/mutt-users@mutt.org/msg21872.html 82 | # 83 | # Michael Best originally integrated those changes into mb2md. 84 | # 85 | # 86 | # In addition, the names of the message files in the Maildir are of a 87 | # regular length and are of the form: 88 | # 89 | # 7654321.000123.mbox:2,xxx 90 | # 91 | # Where "7654321" is the Unix time in seconds when the script was 92 | # run and "000123" is the six zeroes padded message number as 93 | # messages are converted from the Mbox file. "xxx" represents zero or 94 | # more of the above flags F, R, S or T. 95 | # 96 | # 97 | # --------------------------------------------------------------------- 98 | # 99 | # 100 | # USAGE 101 | # ===== 102 | # 103 | # Run this as the user of the mailboxes, not as root. 104 | # 105 | # 106 | # mb2md -h 107 | # mb2md [-c] -m [-d destdir] 108 | # mb2md [-c] -s sourcefile [-d destdir] 109 | # mb2md [-c] -s sourcedir [-l wu-mailboxlist] [-R|-f somefolder] [-d destdir] [-r strip_extension] 110 | # 111 | # -c use the Content-Length: headers (if present) to find the 112 | # beginning of the next message 113 | # Use with caution! Results may be unreliable. I recommend to do 114 | # a run without "-c" first and only use it if you are certain, 115 | # that the mbox in question really needs the "-c" option 116 | # 117 | # -m If this is used then the source will 118 | # be the single mailbox at /var/spool/mail/blah for 119 | # user blah and the destination mailbox will be the 120 | # "destdir" mailbox itself. 121 | # 122 | # 123 | # -s source Directory or file relative to the user's home directory, 124 | # which is where the the "somefolders" directories are located. 125 | # Or if starting with a "/" it is taken as a 126 | # absolute path, e.g. /mnt/oldmail/user 127 | # 128 | # or 129 | # 130 | # A single mbox file which will be converted to 131 | # the destdir. 132 | # 133 | # -R If defined, do not skip directories found in a mailbox 134 | # directory, but runs recursively into each of them, 135 | # creating all wanted folders in Maildir. 136 | # Incompatible with '-f' 137 | # 138 | # -f somefolder Directories, relative to "sourcedir" where the Mbox files 139 | # are. All mailboxes in the "sourcedir" 140 | # directory will be converted and placed in the 141 | # "destdir" directory. (Typically the Inbox directory 142 | # which in this instance is also functioning as a 143 | # folder for other mailboxes.) 144 | # 145 | # The "somefolder" directory 146 | # name will be encoded into the new mailboxes' names. 147 | # See the examples below. 148 | # 149 | # This does not save an UW IMAP dummy message file 150 | # at the start of the Mbox file. Small changes 151 | # in the code could adapt it for looking for 152 | # other distinctive patterns of dummy messages too. 153 | # 154 | # Don't let the source directory you give as "somefolders" 155 | # contain any "."s in its name, unless you want to 156 | # create subfolders from the IMAP user's point of 157 | # view. See the example below. 158 | # 159 | # Incompatible with '-f' 160 | # 161 | # 162 | # -d destdir Directory where the Maildir format directories will be created. 163 | # If not given, then the destination will be ~/Maildir . 164 | # Typically, this is what the IMAP server sees as the 165 | # Inbox and the folder for all user mailboxes. 166 | # If this begins with a '/' the path is considered to be 167 | # absolute, otherwise it is relative to the users 168 | # home directory. 169 | # 170 | # -r strip_ext If defined this extension will be stripped from 171 | # the original mailbox file name before creating 172 | # the corresponding maildir. The extension must be 173 | # given without the leading dot ("."). See the example below. 174 | # 175 | # -l WU-file File containing the list of subscribed folders. If 176 | # migrating from WU-IMAP the list of subscribed folders will 177 | # be found in the file called .mailboxlist in the users 178 | # home directory. This will convert all subscribed folders 179 | # for a single user: 180 | # /bin/mb2md -s mail -l .mailboxlist -R -d Maildir 181 | # and for all users in a directory as root you can do the 182 | # following: 183 | # for i in *; do echo $i;su - $i -c "/bin/mb2md -s mail -l .mailboxlist -R -d Maildir";done 184 | # 185 | # 186 | # Example 187 | # ======= 188 | # 189 | # We have a bunch of directories of Mbox mailboxes located at 190 | # /home/blah/oldmail/ 191 | # 192 | # /home/blah/oldmail/fffff 193 | # /home/blah/oldmail/ggggg 194 | # /home/blah/oldmail/xxx/aaaa 195 | # /home/blah/oldmail/xxx/bbbb 196 | # /home/blah/oldmail/xxx/cccc 197 | # /home/blah/oldmail/xxx/dddd 198 | # /home/blah/oldmail/yyyy/huey 199 | # /home/blah/oldmail/yyyy/duey 200 | # /home/blah/oldmail/yyyy/louie 201 | # 202 | # With the UW IMAP server, fffff and ggggg would have appeared in the root 203 | # of this mail server, along with the Inbox. aaaa, bbbb etc, would have 204 | # appeared in a folder called xxx from that root, and xxx was just a folder 205 | # not a mailbox for storing messages. 206 | # 207 | # We also have the mailspool Inbox at: 208 | # 209 | # /var/spool/mail/blah 210 | # 211 | # 212 | # To convert these, as user blah, we give the first command: 213 | # 214 | # mb2md -m 215 | # 216 | # The main Maildir directory will be created if it does not exist. 217 | # (This is true of any argument options, not just "-m".) 218 | # 219 | # /home/blah/Maildir/ 220 | # 221 | # It has the following subdirectories: 222 | # 223 | # /home/blah/Maildir/tmp/ 224 | # /home/blah/Maildir/new/ 225 | # /home/blah/Maildir/cur/ 226 | # 227 | # Then /var/spool/blah file is read, split into individual files and 228 | # written into /home/blah/Maildir/cur/ . 229 | # 230 | # Now we give the second command: 231 | # 232 | # mb2md -s oldmail -R 233 | # 234 | # This reads recursively all Mbox mailboxes and creates: 235 | # 236 | # /home/blah/Maildir/.fffff/ 237 | # /home/blah/Maildir/.ggggg/ 238 | # /home/blah/Maildir/.xxx/ 239 | # /home/blah/Maildir/.xxx.aaaa/ 240 | # /home/blah/Maildir/.xxx.bbbb/ 241 | # /home/blah/Maildir/.xxx.cccc/ 242 | # /home/blah/Maildir/.xxx.aaaa/ 243 | # /home/blah/Maildir/.yyyy/ 244 | # /home/blah/Maildir/.yyyy.huey/ 245 | # /home/blah/Maildir/.yyyy.duey/ 246 | # /home/blah/Maildir/.yyyy.louie/ 247 | # 248 | # The result, from the IMAP client's point of view is: 249 | # 250 | # Inbox ----------------- 251 | # | 252 | # | fffff ----------- 253 | # | ggggg ----------- 254 | # | 255 | # - xxx ------------- 256 | # | | aaaa -------- 257 | # | | bbbb -------- 258 | # | | cccc -------- 259 | # | | dddd -------- 260 | # | 261 | # - yyyy ------------ 262 | # | huey ------- 263 | # | duey ------- 264 | # | louie ------ 265 | # 266 | # Note that although ~/Maildir/.xxx/ and ~/Maildir/.yyyy may appear 267 | # as folders to the IMAP client the above commands to not generate 268 | # any Maildir folders of these names. These are simply elements 269 | # of the names of other Maildir directories. (if you used '-R', they 270 | # whill be able to act as normal folders, containing messages AND folders) 271 | # 272 | # With a separate run of this script, using just the "-s" option 273 | # without "-f" nor "-R", it would be possible to create mailboxes which 274 | # appear at the same location as far as the IMAP client is 275 | # concerned. By having Mbox mailboxes in some directory: 276 | # ~/oldmail/nnn/ of the form: 277 | # 278 | # /home/blah/oldmail/nn/xxxx 279 | # /home/blah/oldmail/nn/yyyyy 280 | # 281 | # then the command: 282 | # 283 | # mb2md -s oldmail/nn 284 | # 285 | # will create two new Maildirs: 286 | # 287 | # /home/blah/Maildir/.xxx/ 288 | # /home/blah/Maildir/.yyyy/ 289 | # 290 | # Then what used to be the xxx and yyyy folders now function as 291 | # mailboxes too. Netscape 4.77 needed to be put to sleep and given ECT 292 | # to recognise this - deleting the contents of (Win2k example): 293 | # 294 | # C:\Program Files\Netscape\Users\uu\ImapMail\aaa.bbb.ccc\ 295 | # 296 | # where "uu" is the user and "aaa.bbb.ccc" is the IMAP server 297 | # 298 | # I often find that deleting all this directory's contents, except 299 | # "rules.dat", forces Netscape back to reality after its IMAP innards 300 | # have become twisted. Then maybe use File > Subscribe - but this 301 | # seems incapable of subscribing to folders. 302 | # 303 | # For Outlook Express, select the mail server, then click the 304 | # "IMAP Folders" button and use "Reset list". In the "All" 305 | # window, select the mailboxes you want to see in normal 306 | # usage. 307 | # 308 | # 309 | # This script did not recurse subdirectories or delete old mailboxes, before addition of the '-R' parameter :) 310 | # 311 | # Be sure not to be accessing the Mbox mailboxes while running this 312 | # script. It does not attempt to lock them. Likewise, don't run two 313 | # copies of this script either. 314 | # 315 | # 316 | # Trickier usage . . . 317 | # ==================== 318 | # 319 | # If you have a bunch of mailboxes in a directory ~/oldmail/doors/ 320 | # and you want them to appear in folders such as: 321 | # 322 | # ~/Maildir/.music.bands.doors.Jim 323 | # ~/Maildir/.music.bands.doors.John 324 | # 325 | # etc. so they appear in an IMAP folder: 326 | # 327 | # Inbox ----------------- 328 | # | music 329 | # | bands 330 | # | doors 331 | # | Jim 332 | # | John 333 | # | Robbie 334 | # | Ray 335 | # 336 | # Then you could rename the source directory to: 337 | # 338 | # ~/oldmail/music.bands.doors/ 339 | # 340 | # then use: 341 | # 342 | # mb2md -s oldmail -f music.bands.doors 343 | # 344 | # 345 | # Or simply use '-R' switch with: 346 | # mb2md -s oldmail -R 347 | # 348 | # 349 | # Stripping mailbox extensions: 350 | # ============================= 351 | # 352 | # If you want to convert mailboxes that came for example from 353 | # a Windows box than you might want to strip the extension of 354 | # the mailbox name so that it won't create a subfolder in your 355 | # mail clients view. 356 | # 357 | # Example: 358 | # You have several mailboxes named Trash.mbx, Sent.mbx, Drafts.mbx 359 | # If you don't strip the extension "mbx" you will get the following 360 | # hierarchy: 361 | # 362 | # Inbox 363 | # | 364 | # - Trash 365 | # | | mbx 366 | # | 367 | # - Sent 368 | # | | mbx 369 | # | 370 | # - Drafts 371 | # | mbx 372 | # 373 | # This is more than ugly! 374 | # Just use: 375 | # mb2md -s oldmail -r mbx 376 | # 377 | # Note: don't specify the dot! It will be stripped off 378 | # automagically ;) 379 | # 380 | #------------------------------------------------------------------------------ 381 | 382 | 383 | use strict; 384 | use Getopt::Std; 385 | use Date::Parse; 386 | use IO::Handle; 387 | use Fcntl; 388 | 389 | # print the usage message 390 | sub usage() { 391 | print "Usage:\n"; 392 | print " mb2md -h\n"; 393 | print " mb2md [-c] -m [-d destdir]\n"; 394 | print " mb2md [-c] -s sourcefile [-d destdir]\n"; 395 | die " mb2md [-c] -s sourcedir [-l wu-mailboxlist] [-R|-f somefolder] [-d destdir] [-r strip_extension]\n"; 396 | } 397 | # get options 398 | my %opts; 399 | getopts('d:f:chms:r:l:R', \%opts) || usage(); 400 | usage() if ( defined($opts{h}) 401 | || (!defined($opts{m}) && !defined($opts{s})) ); 402 | 403 | # Get uid, username and home dir 404 | my ($name, $passwd, $uid, $gid, $quota, $comment, $gcos, $homedir, $shell) = getpwuid($<); 405 | 406 | # Get arguments and determine source 407 | # and target directories. 408 | my $mbroot = undef; # this is the base directory for the mboxes 409 | my $mbdir = undef; # this is an mbox dir relative to the $mbroot 410 | my $mbfile = undef; # this is an mbox file 411 | my $dest = undef; 412 | my $strip_ext = undef; 413 | my $use_cl = undef; # defines whether we use the Content-Length: header if present 414 | 415 | # if option "-c" is given, we use the Content-Length: header if present 416 | # dangerous! may be unreliable, as the whole CL stuff is a bad idea 417 | if (defined($opts{c})) 418 | { 419 | $use_cl = 1; 420 | } else { 421 | $use_cl = 0; 422 | } 423 | 424 | # first, if the user has gone the -m option 425 | # we simply convert their mailfile 426 | if (defined($opts{m})) 427 | { 428 | if (defined($ENV{'MAIL'})) { 429 | $mbfile = $ENV{'MAIL'}; 430 | } elsif ( -f "/var/spool/mail/$name" ) { 431 | $mbfile = "/var/spool/mail/$name" 432 | } elsif ( -f "/var/mail/$name" ) { 433 | $mbfile = "/var/mail/$name" 434 | } else { 435 | die("I searched \$MAIL, /var/spool/mail/$name and /var/mail/$name, ". 436 | "but I couldn't find your mail spool file - "); 437 | } 438 | } 439 | # see if the user has specified a source directory 440 | elsif (defined($opts{s})) 441 | { 442 | # if opts{s} doesn't start with a "/" then 443 | # it is a subdir of the users $home 444 | # if it does start with a "/" then 445 | # let's take $mbroot as a absolut path 446 | $opts{s} = "$homedir/$opts{s}" if ($opts{s} !~ /^\//); 447 | 448 | # check if the given source is a mbox file 449 | if (-f $opts{s}) 450 | { 451 | $mbfile = $opts{s}; 452 | } 453 | 454 | # otherwise check if it is a directory 455 | elsif (-d $opts{s}) 456 | { 457 | $mbroot = $opts{s}; 458 | # get rid of trailing /'s 459 | $mbroot =~ s/\/$//; 460 | 461 | # check if we have a specified sub directory, 462 | # otherwise the sub directory is '.' 463 | if (defined($opts{f})) 464 | { 465 | $mbdir = $opts{f}; 466 | # get rid of trailing /'s 467 | $mbdir =~ s/\/$//; 468 | } 469 | } 470 | 471 | # otherwise we have an error 472 | else 473 | { 474 | die("Fatal: Source is not an mbox file or a directory!\n"); 475 | } 476 | } 477 | 478 | 479 | # get the dest 480 | defined($opts{d}) && ($dest = $opts{d}) || ($dest = "Maildir"); 481 | # see if we have anything to strip 482 | defined($opts{r}) && ($strip_ext = $opts{r}); 483 | # No '-f' with '-R' 484 | if((defined($opts{R}))&&(defined($opts{f}))) { die "No recursion with \"-f\"";} 485 | # Get list of folders 486 | my @flist; 487 | if(defined($opts{l})) 488 | { 489 | open (LIST,$opts{l}) or die "Could not open mailbox list $opts{l}: $!"; 490 | @flist=; 491 | close LIST; 492 | } 493 | 494 | # if the destination is relative to the home dir, 495 | # check that the home dir exists 496 | die("Fatal: home dir $homedir doesn't exist.\n") if ($dest !~ /^\// && ! -e $homedir); 497 | 498 | # 499 | # form the destination value 500 | # slap the home dir on the front of the dest if the dest does not begin 501 | # with a '/' 502 | $dest = "$homedir/$dest" if ($dest !~ /^\//); 503 | # get rid of trailing /'s 504 | $dest =~ s/\/$//; 505 | 506 | 507 | # Count the number of mailboxes, or 508 | # at least files, we found. 509 | my $mailboxcount = 0; 510 | 511 | # Since we'll be making sub directories of the main 512 | # Maildir, we need to make sure that the main maildir 513 | # exists 514 | &maildirmake($dest); 515 | 516 | # Now we do different things depending on whether we convert one mbox 517 | # file or a directory of mbox files 518 | if (defined($mbfile)) 519 | { 520 | if (!isamailboxfile($mbfile)) 521 | { 522 | print "Skipping $mbfile: not a mbox file\n"; 523 | } 524 | else 525 | { 526 | print "Converting $mbfile to maildir: $dest\n"; 527 | # this is easy, we just run the convert function 528 | &convert($mbfile, $dest); 529 | } 530 | } 531 | # if '-f' was used ... 532 | elsif (defined($mbdir)) 533 | { 534 | print "Converting mboxdir/mbdir: $mbroot/$mbdir to maildir: $dest/\n"; 535 | 536 | # Now set our source directory 537 | my $sourcedir = "$mbroot/$mbdir"; 538 | 539 | # check that the directory we are supposed to be finding mbox 540 | # files in, exists and is a directory 541 | -e $sourcedir or die("Fatal: MBDIR directory $sourcedir/ does not exist.\n"); 542 | -d $sourcedir or die("Fatal: MBDIR $sourcedir is not a directory.\n"); 543 | 544 | 545 | &convertit($mbdir,""); 546 | } 547 | # Else, let's work in $mbroot 548 | else 549 | { 550 | opendir(SDIR, $mbroot) 551 | or die("Fatal: Cannot open source directory $mbroot/ \n"); 552 | 553 | 554 | while (my $sourcefile = readdir(SDIR)) 555 | { 556 | if (-d "$mbroot/$sourcefile") { 557 | # Recurse only if requested (to be changed ?) 558 | if (defined($opts{R})) { 559 | print "convertit($sourcefile,\"\")\n"; 560 | &convertit($sourcefile,""); 561 | } else { 562 | print("$sourcefile is a directory, but '-R' was not used... skipping\n"); 563 | } 564 | } 565 | elsif (!-f "$mbroot/$sourcefile") 566 | { 567 | print "Skipping $mbroot/$sourcefile : not a file nor a dir\n"; 568 | next; 569 | } 570 | elsif (!isamailboxfile("$mbroot/$sourcefile")) 571 | { 572 | print "Skipping $mbroot/$sourcefile : not a mbox file\n"; 573 | next; 574 | } 575 | else 576 | { 577 | &convertit($sourcefile,""); 578 | } 579 | } # end of "while ($sfile = readdir(SDIR))" loop. 580 | closedir(SDIR); 581 | printf("$mailboxcount files processed.\n"); 582 | } 583 | # 584 | 585 | exit 0; 586 | 587 | # My debbugging placeholder I can put somewhere to show how far the script ran. 588 | # die("So far so good.\n\n"); 589 | 590 | # The isamailboxfile function 591 | # ---------------------- 592 | # 593 | # Here we check if the file is a mailbox file, not an address-book or 594 | # something else. 595 | # If file is empty, we say it is a mbox, to create it empty. 596 | # 597 | # Returns 1 if file is said mbox, 0 else. 598 | sub isamailboxfile { 599 | my ($mbxfile) = @_; 600 | return 1 if(-z $mbxfile); 601 | sysopen(MBXFILE, "$mbxfile", O_RDONLY) or die "Could not open $mbxfile ! \n"; 602 | while() { 603 | if (/^From/) { 604 | close(MBXFILE); 605 | return 1; 606 | } 607 | else { 608 | close(MBXFILE); 609 | return 0; 610 | } 611 | } 612 | } 613 | 614 | # The convertit function 615 | # ----------------------- 616 | # 617 | # This function creates all subdirs in maildir, and calls convert() 618 | # for each mbox file. 619 | # Yes, it becomes the 'main loop' :) 620 | sub convertit 621 | { 622 | # Get subdir as argument 623 | my ($dir,$oldpath) = @_; 624 | 625 | $oldpath =~ s/\/\///; 626 | 627 | # Skip files beginning with '.' since they are 628 | # not normally mbox files nor dirs (includes '.' and '..') 629 | if ($dir =~ /^\./) 630 | { 631 | print "Skipping $dir : name begins with a '.'\n"; 632 | return; 633 | } 634 | my $destinationdir = $dir; 635 | my $temppath = $oldpath; 636 | 637 | # We don't want to have .'s in the $targetfile file 638 | # name because they will become directories in the 639 | # Maildir. Therefore we convert them to _'s 640 | $temppath =~ s/\./\_/g; 641 | $destinationdir =~ s/\./\_/g; 642 | 643 | # Appending $oldpath => path is only missing $dest 644 | $destinationdir = "$temppath.$destinationdir"; 645 | 646 | # Converting '/' to '.' in $destinationdir 647 | $destinationdir =~s/\/+/\./g; 648 | 649 | # source dir 650 | my $srcdir="$mbroot/$oldpath/$dir"; 651 | 652 | printf("convertit(): Converting $dir in $mbroot/$oldpath to $dest/$destinationdir\n"); 653 | &maildirmake("$dest/$destinationdir"); 654 | print("destination = $destinationdir\n"); 655 | if (-d $srcdir) { 656 | opendir(SUBDIR, "$srcdir") or die "can't open $srcdir !\n"; 657 | my @subdirlist=readdir(SUBDIR); 658 | closedir(SUBDIR); 659 | foreach (@subdirlist) { 660 | next if (/^\.+$/); 661 | print("Sub: $_\n"); 662 | print("convertit($_,\"$oldpath/$dir\")\n"); 663 | &convertit($_,"$oldpath/$dir"); 664 | } 665 | } else { 666 | # Source file verifs .... 667 | # 668 | return if(defined($opts{l}) && !inlist("$oldpath/$dir",@flist)); 669 | 670 | if (!isamailboxfile("$mbroot/$oldpath/$dir")) 671 | { 672 | print "Skipping $dir (is not mbox)\n"; 673 | next; 674 | } 675 | 676 | # target file verifs... 677 | # 678 | # if $strip_extension is defined, 679 | # strip it off the $targetfile 680 | defined($strip_ext) && ($destinationdir =~ s/\.$strip_ext$//); 681 | &convert("$mbroot/$oldpath/$dir","$dest/$destinationdir"); 682 | $mailboxcount++; 683 | } 684 | } 685 | # The maildirmake function 686 | # ------------------------ 687 | # 688 | # It does the same thing that the maildirmake binary that 689 | # comes with courier-imap distribution 690 | # 691 | sub maildirmake 692 | { 693 | foreach(@_) { 694 | -d $_ or mkdir $_,0700 or die("Fatal: Directory $_ doesn't exist and can't be created.\n"); 695 | 696 | -d "$_/tmp" or mkdir("$_/tmp",0700) or die("Fatal: Unable to make $_/tmp/ subdirectory.\n"); 697 | -d "$_/new" or mkdir("$_/new",0700) or die("Fatal: Unable to make $_/new/ subdirectory.\n"); 698 | -d "$_/cur" or mkdir("$_/cur",0700) or die("Fatal: Unable to make $_/cur/ subdirectory.\n"); 699 | } 700 | } 701 | 702 | # The inlist function 703 | # ------------------------ 704 | # 705 | # It checks that the folder to be converted is in the list of subscribed 706 | # folders in WU-IMAP 707 | # 708 | sub inlist 709 | { 710 | my ($file,@flist) = @_; 711 | my $valid = 0; 712 | # Get rid of the first / if any 713 | $file =~ s/^\///; 714 | foreach my $folder (@flist) { 715 | chomp $folder; 716 | if ($file eq $folder) { 717 | $valid = 1; 718 | last; 719 | } 720 | } 721 | if (!$valid) { 722 | print "$file is not in list\n"; 723 | } 724 | else { 725 | print "$file is in list\n"; 726 | } 727 | 728 | return $valid; 729 | } 730 | 731 | # 732 | 733 | # The convert function 734 | # --------------------- 735 | # 736 | # This function does the down and dirty work of 737 | # actually converting the mbox to a maildir 738 | # 739 | sub convert 740 | { 741 | # get the source and destination as arguments 742 | my ($mbox, $maildir) = @_; 743 | 744 | printf("Source Mbox is $mbox\n"); 745 | printf("Target Maildir is $maildir \n") ; 746 | 747 | # create the directories for the new maildir 748 | # 749 | # if it is the root maildir (ie. converting the inbox) 750 | # these already exist but thats not a big issue 751 | 752 | &maildirmake($maildir); 753 | 754 | # Change to the target mailbox directory. 755 | 756 | chdir "$maildir" ; 757 | 758 | # Converts a Mbox to multiple files 759 | # in a Maildir. 760 | # This is adapted from mbox2maildir. 761 | # 762 | # Open the Mbox mailbox file. 763 | 764 | 765 | if (sysopen(MBOX, "$mbox", O_RDONLY)) 766 | { 767 | #printf("Converting Mbox $mbox . . . \n"); 768 | } 769 | else 770 | { 771 | die("Fatal: unable to open input mailbox file: $mbox ! \n"); 772 | } 773 | 774 | # This loop scans the input mailbox for 775 | # a line starting with "From ". The 776 | # "^" before it is pattern-matching 777 | # lingo for it being at the start of a 778 | # line. 779 | # 780 | # Each email in Mbox mailbox starts 781 | # with such a line, which is why any 782 | # such line in the body of the email 783 | # has to have a ">" put in front of it. 784 | # 785 | # This is not required in a Maildir 786 | # mailbox, and some majik below 787 | # finds any such quoted "> From"s and 788 | # gets rid of the "> " quote. 789 | # 790 | # Each email is put in a file 791 | # in the cur/ subdirectory with a 792 | # name of the form: 793 | # 794 | # nnnnnnnnn.cccc.mbox:2,XXXX 795 | # 796 | # where: 797 | # "nnnnnnnnn" is the Unix time since 798 | # 1970 when this script started 799 | # running, incremented by 1 for 800 | # every email. This is to ensure 801 | # unique names for each message 802 | # file. 803 | # 804 | # ".cccc" is the message count of 805 | # messages from this mbox. 806 | # 807 | # ".mbox" is just to indicate that 808 | # this message was converted from 809 | # an Mbox mailbox. 810 | # 811 | # ":2," is the start of potentially 812 | # multiple IMAP flag characters 813 | # "XXXX", but may be followed by 814 | # nothing. 815 | # 816 | # This is sort-of compliant with 817 | # the Maildir naming conventions 818 | # specified at: 819 | # 820 | # http://www.qmail.org/man/man5/maildir.html 821 | # 822 | # This approach does not involve the 823 | # process ID or the hostname, but it is 824 | # probably good enough. 825 | # 826 | # When the IMAP server looks at this 827 | # mailbox, it will move the files to 828 | # the cur/ directory and change their 829 | # names as it pleases. In the case 830 | # of Courier IMAP, the names will 831 | # become like: 832 | # 833 | # 995096541.25351.mbox:2,S 834 | # 835 | # with 25351 being Courier IMAP's 836 | # process ID. The :2, is the start 837 | # of the flags, and the "S" means 838 | # that this one has been seen by 839 | # the user. (But is this the same 840 | # meaning as the user actually 841 | # having opened the message to see 842 | # its contents, rather than just the 843 | # IMAP server having been asked to 844 | # list the message's Subject etc. 845 | # so the client could list it in the 846 | # visible Inbox?) 847 | # 848 | # This contrasts with a message 849 | # created by Courier IMAP, say with 850 | # a message copy, which is like: 851 | # 852 | # 995096541.25351.zair,S=14285:2,S 853 | # 854 | # where ",S=14285" is the size of the 855 | # message in bytes. 856 | # 857 | # Courier Maildrop's names are similar 858 | # but lack the ":2,XXXX" flags . . . 859 | # except for my modified Maildrop 860 | # which can deliver them with a 861 | # ":2,T" - flagged for deletion. 862 | # 863 | # I have extended the logic of the 864 | # per-message inner loop to stop 865 | # saving a file for a message with: 866 | # 867 | # Subject: DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA 868 | # 869 | # This is the dummy message, always 870 | # at the start of an Mbox format 871 | # mailbox file - and is put there 872 | # by UW IMAPD. Since quite a few 873 | # people will use this for 874 | # converting from a UW system, 875 | # I figure it is worth it. 876 | # 877 | # I will not save any such message 878 | # file for the dummy message. 879 | # 880 | # Plan 881 | # ---- 882 | # 883 | # We want to read the entire Mbox file, whilst 884 | # going through a loop for each message we find. 885 | # 886 | # We want to read all the headers of the message, 887 | # starting with the "From " line. For that "From " 888 | # line we want to get a date. 889 | # 890 | # For all other header lines, we want to store them 891 | # in $headers whilst parsing them to find: 892 | # 893 | # 1 - Any flags in the "Status: " or "X-Status: " or 894 | # "X-Mozilla-Status: " lines. 895 | # 896 | # 2 - A subject line indicating this is the dummy message 897 | # at the start (typically, but not necessarily) of 898 | # the Mbox. 899 | # 900 | # Once we reach the end of the headers, we will crunch any 901 | # flags we found to create a file name. Then, unless this is 902 | # the dummy message we create that file and write all the 903 | # headers to it. 904 | # 905 | # Then we continue reading the Mbox, converting ">From " to 906 | # "From " and writing it to the file, until we reach one of: 907 | # 908 | # 1 - Another "From " line (indicating the start of another 909 | # message). 910 | # 911 | # or 912 | # 913 | # 2 - The end of the Mbox. 914 | # 915 | # In the former case, which we detect at the start of the loop 916 | # we need to close the file and touch it to alter its date-time. 917 | # 918 | # In the later case, we also need to close the file and touch 919 | # it to alter its date-time - but this is beyond the end of the 920 | # loop. 921 | 922 | 923 | # Variables 924 | # --------- 925 | 926 | my $messagecount = 0; 927 | 928 | # For generating unique filenames for 929 | # each message. Initialise it here with 930 | # numeric time in seconds since 1970. 931 | my $unique = time; 932 | 933 | # Name of message file to delete if we found that 934 | # it was created by reading the Mbox dummy message. 935 | 936 | my $deletedummy = ''; 937 | 938 | # To store the complete "From (address) (date-time) 939 | # which delineates the start of each message 940 | # in the Mbox 941 | my $fromline = ''; 942 | 943 | 944 | # Set to 1 when we are reading the header lines, 945 | # including the "From " line. 946 | # 947 | # 0 means we are reading the message body and looking 948 | # for another "From " line. 949 | 950 | my $inheaders = 0; 951 | 952 | # Variable to hold all headers (apart from 953 | # the first line "From ...." which is not 954 | # part of the message itself. 955 | my $headers = ''; 956 | 957 | # Variable to hold the accumulated characters 958 | # we find in header lines of the type: 959 | # 960 | # Status: 961 | # X-Status: 962 | # X-Mozilla-Status: 963 | # X-Evolution: 964 | my $flags = ''; 965 | 966 | # To build the file name for the message in. 967 | my $messagefn = ''; 968 | 969 | 970 | # The date string from the "From " line of each 971 | # message will be written here - and used by 972 | # touch to alter the date-time of each message 973 | # file. Put non-date text here to make it 974 | # spit the dummy if my code fails to find a 975 | # date to write into this. 976 | 977 | my $receivedate = 'Bogus'; 978 | 979 | # The subject of the message 980 | my $subject = ''; 981 | 982 | my $previous_line_was_empty = 1; 983 | 984 | # We record the message start line here, for error 985 | # reporting. 986 | my $startline; 987 | 988 | # If defined, we use this as the number of bytes in the 989 | # message body rather than looking for a /^From / line. 990 | my $contentlength; 991 | 992 | # A From lines can either occur as the first 993 | # line of a file, or after an empty line. 994 | # Most mail systems will quote all From lines 995 | # appearing in the message, but some will only 996 | # do it when necessary. 997 | # Since we initialise the variable to true, 998 | # we don't need to check for beginning of file. 999 | 1000 | while() 1001 | { 1002 | # exchange possible Windows EOL (CRLF) with Unix EOL (LF) 1003 | $_ =~ s/\r\n$/\n/; 1004 | 1005 | if ( /^From / 1006 | && $previous_line_was_empty 1007 | && (!defined $contentlength) 1008 | ) 1009 | { 1010 | # We are reading the "From " line which has an 1011 | # email address followed by a receive date. 1012 | # Turn on the $inheaders flag until we reach 1013 | # the end of the headers. 1014 | 1015 | $inheaders = 1; 1016 | 1017 | # record the message start line 1018 | 1019 | $startline = $.; 1020 | 1021 | # If this is not the first run through the loop 1022 | # then this means we have already been working 1023 | # on a message. 1024 | 1025 | if ($messagecount > 0) 1026 | { 1027 | # If so, then close that message file and then 1028 | # use utime to change its date-time. 1029 | # 1030 | # Note this code should be duplicated to do 1031 | # the same thing at the end of the while loop 1032 | # since we must close and touch the final message 1033 | # file we were writing when we hit the end of the 1034 | # Mbox file. 1035 | 1036 | close (OUT); 1037 | if ($messagefn ne '') { 1038 | my $t = str2time($receivedate); 1039 | utime $t, $t, $messagefn; 1040 | } 1041 | } 1042 | 1043 | # Because we opened the Mbox file without any 1044 | # variable, I think this means that we have its 1045 | # current line in Perl's default variable "$_". 1046 | # So all sorts of pattern matching magic works 1047 | # directly on it. 1048 | 1049 | # We are currently reading the first line starting with 1050 | # "From " which contains the date we want. 1051 | # 1052 | # This will be of the form: 1053 | # 1054 | # From dduck@test.org Wed Nov 24 11:05:35 1999 1055 | # 1056 | # at least with UW-IMAP. 1057 | # 1058 | # However, I did find a nasty exception to this in my 1059 | # tests, of the form: 1060 | # 1061 | # "bounce-MusicNewsletter 5-rw=test.org"@announce2.mp3.com 1062 | # 1063 | # This makes it trickier to get rid of the email address, 1064 | # but I did find a way. I can't rule out that there would 1065 | # be some address like this with an "@" in the quoted 1066 | # portion too. 1067 | # 1068 | # Unfortunately, testing with an old Inbox Mbox file, 1069 | # I also found an instance where the email address 1070 | # had no @ sign at all. It was just an email 1071 | # account name, with no host. 1072 | # 1073 | # I could search for the day of the week. If I skipped 1074 | # at least one word of non-whitespace (1 or more contiguous 1075 | # non-whitespace characters) then searched for a day of 1076 | # the week, then I should be able to avoid almost 1077 | # every instance of a day of the week appearing in 1078 | # the email address. 1079 | # 1080 | # Do I need a failsafe arrangement to provide some 1081 | # other date to touch if I don't get what seems like 1082 | # a date in my resulting string? For now, no. 1083 | # 1084 | # I will take one approach if there is an @ in the 1085 | # "From " line and another (just skip the first word 1086 | # after "From ") if there is no @ in the line. 1087 | # 1088 | # If I knew more about Perl I would probably do it in 1089 | # a more elegant way. 1090 | 1091 | # Copy the current line into $fromline. 1092 | 1093 | $fromline = $_; 1094 | 1095 | # Now get rid of the "From ". " =~ s" means substitute. 1096 | # Find the word "From " at the start of the line and 1097 | # replace it with nothing. The nothing is what is 1098 | # between the second and third slash. 1099 | 1100 | $fromline =~ s/^From // ; 1101 | 1102 | 1103 | # Likewise get rid of the email address. 1104 | # This first section is if we determine there is one 1105 | # (or more . . . ) "@" characters in the line, which 1106 | # would normally be the case. 1107 | 1108 | if ($fromline =~ m/@/) 1109 | { 1110 | # The line has at least one "@" in it, so we assume 1111 | # this is in the middle of an email address. 1112 | # 1113 | # If the email address had no spaces, then we could 1114 | # get rid of the whole thing by searching for any number 1115 | # of non-whitespace characters (\S) contiguously, and 1116 | # then I think a space. Subsitute nothing for this. 1117 | # 1118 | # $fromline =~ s/(\S)+ // ; 1119 | # 1120 | # But we need something to match any number of non-@ 1121 | # characters, then the "@" and then all the non-whitespace 1122 | # characters from there (which takes us to the end of 1123 | # "test.org") and then the space following that. 1124 | # 1125 | # A tutorial on regular expressions is: 1126 | # 1127 | # http://www.perldoc.com/perl5.6.1/pod/perlretut.html 1128 | # 1129 | # Get rid of all non-@ characters up to the first "@": 1130 | 1131 | $fromline =~ s/[^@]+//; 1132 | 1133 | 1134 | # Get rid of the "@". 1135 | 1136 | $fromline =~ s/@//; 1137 | } 1138 | # If there was an "@" in the line, then we have now 1139 | # removed the first one (lets hope there aren't more!) 1140 | # and everything which preceded it. 1141 | # 1142 | # we now remove either something like 1143 | # '(foo bar)'. eg. '(no mail address)', 1144 | # or everything after the '@' up to the trailing 1145 | # timezone 1146 | # 1147 | # FIXME: all those regexp should be combined to just one single one 1148 | 1149 | $fromline =~ s/(\((\S*| )+\)|\S+) *//; 1150 | 1151 | chomp $fromline; 1152 | 1153 | # Stash the date-time for later use. We will use it 1154 | # to touch the file after we have closed it. 1155 | 1156 | $receivedate = $fromline; 1157 | 1158 | # Debugging lines: 1159 | # 1160 | # print "$receivedate is the receivedate of message $messagecount.\n"; 1161 | # $receivedate = "Wed Nov 24 11:05:35 1999"; 1162 | # 1163 | # To look at the exact date-time of files: 1164 | # 1165 | # ls -lFa --full-time 1166 | # 1167 | # End of handling the "From " line. 1168 | } 1169 | 1170 | 1171 | # Now process header lines which are not the "From " line. 1172 | 1173 | if ( ($inheaders eq 1) 1174 | && (! /^From /) 1175 | ) 1176 | { 1177 | # Now we are reading the header lines after the "From " line. 1178 | # Keep looking for the blank line which indicates the end of the 1179 | # headers. 1180 | 1181 | 1182 | # ".=" means append the current line to the $headers 1183 | # variable. 1184 | # 1185 | # For some reason, I was getting two blank lines 1186 | # at the end of the headers, rather than one, 1187 | # so I decided not to read in the blank line 1188 | # which terminates the headers. 1189 | # 1190 | # Delete the "unless ($_ eq "\n")" to get rid 1191 | # of this kludge. 1192 | 1193 | $headers .= $_ unless ($_ eq "\n"); 1194 | 1195 | # Now scan the line for various status flags 1196 | # and to fine the Subject line. 1197 | 1198 | $flags .= $1 if /^Status: ([A-Z]+)/; 1199 | $flags .= $1 if /^X-Status: ([A-Z]+)/; 1200 | if (/^X-Mozilla-Status: ([0-9a-f]{4})/i) 1201 | { 1202 | $flags .= 'R' if (hex($1) & 0x0001); 1203 | $flags .= 'A' if (hex($1) & 0x0002); 1204 | $flags .= 'D' if (hex($1) & 0x0008); 1205 | } 1206 | if(/^X\-Evolution:\s+\w{8}\-(\w{4})/oi) 1207 | { 1208 | $b = pack("H4", $1); #pack it as 4 digit hex (0x0000) 1209 | $b = unpack("B32", $b); #unpack into bit string 1210 | 1211 | # "usually" only the right most six bits are used 1212 | # however, I have come across a seventh bit in 1213 | # about 15 (out of 10,000) messages with this bit 1214 | # activated. 1215 | # I have not found any documentation in the source. 1216 | # If you find out what it does, please let me know. 1217 | 1218 | # Notes: 1219 | # Evolution 1.4 does mark forwarded messages. 1220 | # The sixth bit is to denote an attachment 1221 | 1222 | $flags .= 'A' if($b =~ /[01]{15}1/); #replied 1223 | $flags .= 'D' if($b =~ /[01]{14}1[01]{1}/); #deleted 1224 | $flags .= 'T' if($b =~ /[01]{13}1[01]{2}/); #draft 1225 | $flags .= 'F' if($b =~ /[01]{12}1[01]{3}/); #flagged 1226 | $flags .= 'R' if($b =~ /[01]{11}1[01]{4}/); #seen/read 1227 | } 1228 | $subject = $1 if /^Subject: (.*)$/; 1229 | if ($use_cl eq 1) 1230 | { 1231 | $contentlength = $1 if /^Content-Length: (\d+)$/; 1232 | } 1233 | 1234 | # Now look out for the end of the headers - a blank 1235 | # line. When we find it, create the file name and 1236 | # analyse the Subject line. 1237 | 1238 | if ($_ eq "\n") 1239 | { 1240 | # We are at the end of the headers. Set the 1241 | # $inheaders flag back to 0. 1242 | 1243 | $inheaders = 0; 1244 | 1245 | # Include the current newline in the content length 1246 | 1247 | ++$contentlength if defined $contentlength; 1248 | 1249 | # Create the file name for the current message. 1250 | # 1251 | # A simple version of this would be: 1252 | # 1253 | # $messagefn = "cur/$unique.$messagecount.mbox:2,"; 1254 | # 1255 | # This would create names with $messagecount values of 1256 | # 1, 2, etc. But for neatness when looking at a 1257 | # directory of such messages, sorted by filename, 1258 | # I want to have leading zeroes on message count, so 1259 | # that they would be 000001 etc. This makes them 1260 | # appear in message order rather than 1 being after 1261 | # 19 etc. So this is good for up to 999,999 messages 1262 | # in a mailbox. It is a cosmetic matter for a person 1263 | # looking into the Maildir directory manually. 1264 | # To do this, use sprintf instead with "%06d" for 1265 | # 6 characters of zero-padding: 1266 | 1267 | $messagefn = sprintf ("cur/%d.%06d.mbox:2,", $unique, $messagecount) ; 1268 | 1269 | 1270 | # Append flag characters to the end of the 1271 | # filename, according to flag characters 1272 | # collected from the message headers 1273 | 1274 | $messagefn .= 'F' if $flags =~ /F/; # Flagged. 1275 | $messagefn .= 'R' if $flags =~ /A/; # Replied to. 1276 | $messagefn .= 'S' if $flags =~ /R/; # Seen or Read. 1277 | $messagefn .= 'T' if $flags =~ /D/; # Tagged for deletion. 1278 | 1279 | 1280 | # Opens filename $messagefn for output (>) with filehandle OUT. 1281 | 1282 | open(OUT, ">$messagefn") or die("Fatal: unable to create new message $messagefn"); 1283 | 1284 | # Count the messages. 1285 | 1286 | $messagecount++; 1287 | 1288 | # Only for the first message, 1289 | # check to see if it is a dummy. 1290 | # Delete the message file we 1291 | # just created if it was for the 1292 | # dummy message at the start 1293 | # of the Mbox. 1294 | # 1295 | # Add search terms as required. 1296 | # The last 2 lines are for rent. 1297 | # 1298 | # "m" means match the regular expression, 1299 | # but we can do without it. 1300 | # 1301 | # Do I need to escape the ' in "DON'T"? 1302 | # I didn't in the original version. 1303 | 1304 | if ( (($messagecount == 1) && defined($subject)) 1305 | && ($subject =~ m/^DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA/) 1306 | ) 1307 | { 1308 | # Stash the file name of the dummy message so we 1309 | # can delete it later. 1310 | 1311 | $deletedummy = "$messagefn"; 1312 | } 1313 | 1314 | # Print the collected headers to the message file. 1315 | 1316 | print OUT "$headers"; 1317 | 1318 | 1319 | # Clear $headers and $flags ready for the next message. 1320 | 1321 | $headers = ''; 1322 | $flags = ''; 1323 | 1324 | # End of processing the headers once we found the 1325 | # blank line which terminated them 1326 | } 1327 | 1328 | # End of dealing with the headers. 1329 | } 1330 | 1331 | 1332 | if ( $inheaders eq 0) 1333 | { 1334 | 1335 | # We are now processing the message body. 1336 | # 1337 | # Now we have passed the headers to the 1338 | # output file, we scan until the while 1339 | # loop finds another "From " line. 1340 | 1341 | # Decrement our content length if we're 1342 | # using it to find the end of the message 1343 | # body 1344 | 1345 | if (defined $contentlength) { 1346 | 1347 | # Decrement our $contentlength variable 1348 | 1349 | $contentlength -= length($_); 1350 | 1351 | # The proper end for a message with Content-Length 1352 | # specified is the $contentlength variable should 1353 | # be exactly -1 and we should be on a bare 1354 | # newline. Note that the bare newline is not 1355 | # printed to the end of the current message as 1356 | # it's actually a message separator in the mbox 1357 | # format rather than part of the message. The 1358 | # next line _should_ be a From_ line, but just in 1359 | # case the Content-Length header is incorrect 1360 | # (e.g. a corrupt mailbox), we just continue 1361 | # putting lines into the current message until we 1362 | # see the next From_ line. 1363 | 1364 | if ($contentlength < 0) { 1365 | if ($contentlength == -1 && $_ eq "\n") { 1366 | $contentlength = undef; 1367 | next; 1368 | } 1369 | $contentlength = undef; 1370 | } 1371 | } 1372 | 1373 | # 1374 | # We want to copy every part of the message 1375 | # body to the output file, except for the 1376 | # quoted ">From " lines, which was the 1377 | # way the IMAP server encoded body lines 1378 | # starting with "From ". 1379 | # 1380 | # Pattern matching Perl majik to 1381 | # get rid of an Mbox quoted From. 1382 | # 1383 | # This works on the default variable "$_" which 1384 | # contains the text from the Mbox mailbox - I 1385 | # guess this is the case because of our 1386 | # (open(MBOX ....) line above, which did not 1387 | # assign this to anything else, so it would go 1388 | # to the default variable. This enables 1389 | # inscrutably terse Perlisms to follow. 1390 | # 1391 | # "s" means "Subsitute" and it looks for any 1392 | # occurrence of ">From" starting at the start 1393 | # of the line. When it finds this, it replaces 1394 | # it with "From". 1395 | # 1396 | # So this finds all instances in the Mbox message 1397 | # where the original line started with the word 1398 | # "From" but was converted to ">From" in order to 1399 | # not be mistaken for the "From ..." line which 1400 | # is used to demark each message in the Mbox. 1401 | # This was was a destructive conversion because 1402 | # any message which originally had ">From" at the 1403 | # start of the line, before being put into the 1404 | # Mbox, will now have that line without the ">". 1405 | 1406 | s/^>From /From /; 1407 | 1408 | # Glorious tersness here. Thanks Simon for 1409 | # explaining this. 1410 | # 1411 | # "print OUT" means print the default variable to 1412 | # the file of file handle OUT. This is where 1413 | # the bulk of the message text is written to 1414 | # the output file. 1415 | 1416 | print OUT or die("Fatal: unable to write to new message to $messagefn"); 1417 | 1418 | 1419 | # End of the if statement dealing with message body. 1420 | } 1421 | 1422 | $previous_line_was_empty = ( $_ eq "\n" ); 1423 | 1424 | # End of while (MBOX) loop. 1425 | } 1426 | # Close the input file. 1427 | 1428 | close(MBOX); 1429 | 1430 | # Close the output file, and duplicate the code 1431 | # from the start of the while loop which touches 1432 | # the date-time of the most recent message file. 1433 | 1434 | close(OUT); 1435 | if ($messagefn ne '') { 1436 | my $t = str2time($receivedate); 1437 | utime $t, $t, $messagefn; 1438 | } 1439 | 1440 | # After all the messages have been 1441 | # converted, check to see if the 1442 | # first one was a dummy. 1443 | # if so, delete it and make 1444 | # the message count one less. 1445 | 1446 | if ($deletedummy ne "") 1447 | { 1448 | printf("Dummy mail system first message detected and not saved.\n"); 1449 | unlink $deletedummy; 1450 | 1451 | $messagecount--; 1452 | 1453 | } 1454 | 1455 | printf("$messagecount messages.\n\n"); 1456 | } 1457 | --------------------------------------------------------------------------------
  13 |
16 |
From
17 | WASHINGTON, DC 18 | 19 | 20 |
✈ 21 | 22 |
Destination
23 | SFO 24 | 25 | 26 |
30 |
Depart
31 | 08:05 32 |
33 |
Date
34 | 2014-09-04 35 |
39 |
Arriving
40 | 2014-09-04 10:38 41 |
Flight number
42 | Flight number UA1704 with 43 |
Ticket
44 | 45 |
46 |
47 |
  57 |
60 |
From
61 | SFO 62 | 63 | 64 |
✈ 65 | 66 |
Destination
67 | WASHINGTON, DC 68 | 69 | 70 |
74 |
Depart
75 | 10:05 76 |
77 |
Date
78 | 2014-09-07 79 |
83 |
Arriving
84 | 2014-09-07 18:18 85 |
Flight number
86 | Flight number UA1144 with 87 |
Ticket
88 | 89 |
90 |
91 |
  101 |
104 |
From
105 | SFO 106 | 107 | 108 |
✈ 109 | 110 |
Destination
111 | WASHINGTON, DC 112 | 113 | 114 |
118 |
Depart
119 | 10:05 120 |
121 |
Date
122 | 2014-09-07 123 |
127 |
Arriving
128 | 2014-09-07 18:18 129 |
Flight number
130 | Flight number UA1144 with 131 |
Ticket
132 | 133 |
134 |
135 |
  145 |
148 |
From
149 | WASHINGTON, DC 150 | 151 | 152 |
✈ 153 | 154 |
Destination
155 | MUC 156 | 157 | 158 |
162 |
Depart
163 | 17:20 164 |
165 |
Date
166 | 2014-09-18 167 |
171 |
Arriving
172 | 2014-09-19 07:50 173 |
Flight number
174 | Flight number UA132 with 175 |
Ticket
176 | 177 |
178 |
179 |
  189 |
192 |
From
193 | MUC 194 | 195 | 196 |
✈ 197 | 198 |
Destination
199 | WAW 200 | 201 | 202 |
206 |
Depart
207 | 14:40 208 |
209 |
Date
210 | 2014-09-19 211 |
215 |
Arriving
216 | 2014-09-19 16:10 217 |
Flight number
218 | Flight number LH1614 with 219 |
Ticket
220 | 221 |
222 |
223 |
  233 |
236 |
From
237 | WAW 238 | 239 | 240 |
✈ 241 | 242 |
Destination
243 | FRA 244 | 245 | 246 |
250 |
Depart
251 | 10:10 252 |
253 |
Date
254 | 2014-10-02 255 |
259 |
Arriving
260 | 2014-10-02 12:00 261 |
Flight number
262 | Flight number LH1347 with 263 |
Ticket
264 | 265 |
266 |
267 |
  277 |
280 |
From
281 | FRA 282 | 283 | 284 |
✈ 285 | 286 |
Destination
287 | WASHINGTON, DC 288 | 289 | 290 |
294 |
Depart
295 | 17:00 296 |
297 |
Date
298 | 2014-10-02 299 |
303 |
Arriving
304 | 2014-10-02 20:00 305 |
Flight number
306 | Flight number UA933 with 307 |
Ticket
308 | 309 |
310 |
311 |
50 days later 321 |
324 |
From
325 | SEA 326 | 327 | 328 |
✈ 329 | 330 |
Destination
331 | BWI 332 | 333 | 334 |
338 |
Depart
339 | 08:15 340 |
341 |
Date
342 | 2014-11-21 343 |
347 |
Arriving
348 | 2014-11-21 16:10 349 |
Flight number
350 | Flight number Alaska 766Boeing 737-800 with 351 |
Ticket
352 | Ticket#XYWHNK 353 |
354 |
355 |
  365 |
368 |
From
369 | BWI 370 | 371 | 372 |
✈ 373 | 374 |
Destination
375 | SEA 376 | 377 | 378 |
382 |
Depart
383 | 17:10 384 |
385 |
Date
386 | 2014-11-21 387 |
391 |
Arriving
392 | 2014-11-21 20:05 393 |
Flight number
394 | Flight number Alaska 767Boeing 737-800 with 395 |
Ticket
396 | Ticket#XYWHNK 397 |
398 |
399 |
144 days later 409 |
412 |
From
413 | WASHINGTON, DC 414 | 415 | 416 |
✈ 417 | 418 |
Destination
419 | SFO 420 | 421 | 422 |
426 |
Depart
427 | 06:00 428 |
429 |
Date
430 | 2015-04-14 431 |
435 |
Arriving
436 | 2015-04-14 08:56 437 |
Flight number
438 | Flight number UA567 with 439 |
Ticket
440 | 441 |
442 |
443 |
  453 |
456 |
From
457 | WASHINGTON, DC 458 | 459 | 460 |
✈ 461 | 462 |
Destination
463 | SFO 464 | 465 | 466 |
470 |
Depart
471 | 06:00 472 |
473 |
Date
474 | 2015-04-14 475 |
479 |
Arriving
480 | 2015-04-14 08:56 481 |
Flight number
482 | Flight number UA567 with 483 |
Ticket
484 | Ticket#G1ZCWK 485 |
486 |
487 |
  497 |
500 |
From
501 | WASHINGTON, DC 502 | 503 | 504 |
✈ 505 | 506 |
Destination
507 | SFO 508 | 509 | 510 |
514 |
Depart
515 | 09:25 516 |
517 |
Date
518 | 2015-04-14 519 |
523 |
Arriving
524 | 2015-04-14 12:22 525 |
Flight number
526 | Flight number UA782 with 527 |
Ticket
528 | Ticket#G1ZCWK 529 |
530 |
531 |
  541 |
544 |
From
545 | SFO 546 | 547 | 548 |
✈ 549 | 550 |
Destination
551 | WASHINGTON, DC 552 | 553 | 554 |
558 |
Depart
559 | 23:10 560 |
561 |
Date
562 | 2015-04-14 563 |
567 |
Arriving
568 | 2015-04-15 07:22 569 |
Flight number
570 | Flight number UA355 with 571 |
Ticket
572 | 573 |
574 |
575 |
  585 |
588 |
From
589 | SFO 590 | 591 | 592 |
✈ 593 | 594 |
Destination
595 | WASHINGTON, DC 596 | 597 | 598 |
602 |
Depart
603 | 23:10 604 |
605 |
Date
606 | 2015-04-14 607 |
611 |
Arriving
612 | 2015-04-15 07:22 613 |
Flight number
614 | Flight number UA355 with 615 |
Ticket
616 | Ticket#G11F6Y 617 |
618 |
619 |
  629 |
632 |
From
633 | SFO 634 | 635 | 636 |
✈ 637 | 638 |
Destination
639 | WASHINGTON, DC 640 | 641 | 642 |
646 |
Depart
647 | 23:10 648 |
649 |
Date
650 | 2015-04-15 651 |
655 |
Arriving
656 | 2015-04-16 07:22 657 |
Flight number
658 | Flight number UA355 with 659 |
Ticket
660 | 661 |
662 |
663 |
105 days later 673 |
676 |
From
677 | NEWARK, NJ 678 | 679 | 680 |
✈ 681 | 682 |
Destination
683 | DEN 684 | 685 | 686 |
690 |
Depart
691 | 06:50 692 |
693 |
Date
694 | 2015-07-29 695 |
699 |
Arriving
700 | 2015-07-29 09:03 701 |
Flight number
702 | Flight number UA1211 with 703 |
Ticket
704 | 705 |
706 |
707 |
  717 |
720 |
From
721 | NEWARK, NJ 722 | 723 | 724 |
✈ 725 | 726 |
Destination
727 | DEN 728 | 729 | 730 |
734 |
Depart
735 | 06:50 736 |
737 |
Date
738 | 2015-07-29 739 |
743 |
Arriving
744 | 2015-07-29 09:03 745 |
Flight number
746 | Flight number UA1211 with 747 |
Ticket
748 | Ticket#ESX745 749 |
750 |
751 |
  761 |
764 |
From
765 | DEN 766 | 767 | 768 |
✈ 769 | 770 |
Destination
771 | RNO 772 | 773 | 774 |
778 |
Depart
779 | 11:17 780 |
781 |
Date
782 | 2015-07-29 783 |
787 |
Arriving
788 | 2015-07-29 12:37 789 |
Flight number
790 | Flight number UA295 with 791 |
Ticket
792 | 793 |
794 |
795 |
  805 |
808 |
From
809 | DEN 810 | 811 | 812 |
✈ 813 | 814 |
Destination
815 | RNO 816 | 817 | 818 |
822 |
Depart
823 | 11:17 824 |
825 |
Date
826 | 2015-07-29 827 |
831 |
Arriving
832 | 2015-07-29 12:37 833 |
Flight number
834 | Flight number UA295 with 835 |
Ticket
836 | Ticket#ESX745 837 |
838 |
839 |
  849 |
852 |
From
853 | RNO 854 | 855 | 856 |
✈ 857 | 858 |
Destination
859 | SFO 860 | 861 | 862 |
866 |
Depart
867 | 13:26 868 |
869 |
Date
870 | 2015-08-04 871 |
875 |
Arriving
876 | 2015-08-04 14:45 877 |
Flight number
878 | Flight number UA295 with 879 |
Ticket
880 | 881 |
882 |
883 |
  893 |
896 |
From
897 | RNO 898 | 899 | 900 |
✈ 901 | 902 |
Destination
903 | SFO 904 | 905 | 906 |
910 |
Depart
911 | 13:26 912 |
913 |
Date
914 | 2015-08-04 915 |
919 |
Arriving
920 | 2015-08-04 14:45 921 |
Flight number
922 | Flight number UA295 with 923 |
Ticket
924 | Ticket#ESX745 925 |
926 |
927 |
  937 |
940 |
From
941 | SFO 942 | 943 | 944 |
✈ 945 | 946 |
Destination
947 | NEWARK, NJ 948 | 949 | 950 |
954 |
Depart
955 | 16:02 956 |
957 |
Date
958 | 2015-08-04 959 |
963 |
Arriving
964 | 2015-08-05 00:32 965 |
Flight number
966 | Flight number UA1674 with 967 |
Ticket
968 | 969 |
970 |
971 |
  981 |
984 |
From
985 | SFO 986 | 987 | 988 |
✈ 989 | 990 |
Destination
991 | NEWARK, NJ 992 | 993 | 994 |
998 |
Depart
999 | 16:02 1000 |
1001 |
Date
1002 | 2015-08-04 1003 |
1007 |
Arriving
1008 | 2015-08-05 00:32 1009 |
Flight number
1010 | Flight number UA1674 with 1011 |
Ticket
1012 | Ticket#ESX745 1013 |
1014 |
1015 |
16 days later 1025 |
1028 |
From
1029 | WASHINGTON, DC 1030 | 1031 | 1032 |
✈ 1033 | 1034 |
Destination
1035 | SFO 1036 | 1037 | 1038 |
1042 |
Depart
1043 | 06:47 1044 |
1045 |
Date
1046 | 2015-08-20 1047 |
1051 |
Arriving
1052 | 2015-08-20 09:40 1053 |
Flight number
1054 | Flight number UA434 with 1055 |
Ticket
1056 | Ticket#HVZT0F 1057 |
1058 |
1059 |
  1069 |
1072 |
From
1073 | SFO 1074 | 1075 | 1076 |
✈ 1077 | 1078 |
Destination
1079 | RNO 1080 | 1081 | 1082 |
1086 |
Depart
1087 | 11:41 1088 |
1089 |
Date
1090 | 2015-08-20 1091 |
1095 |
Arriving
1096 | 2015-08-20 12:49 1097 |
Flight number
1098 | Flight number UA677 with 1099 |
Ticket
1100 | Ticket#HVZT0F 1101 |
1102 |
1103 |
  1113 |
1116 |
From
1117 | SFO 1118 | 1119 | 1120 |
✈ 1121 | 1122 |
Destination
1123 | WASHINGTON, DC 1124 | 1125 | 1126 |
1130 |
Depart
1131 | 10:50 1132 |
1133 |
Date
1134 | 2015-08-25 1135 |
1139 |
Arriving
1140 | 2015-08-25 19:04 1141 |
Flight number
1142 | Flight number UA1993 with 1143 |
Ticket
1144 | Ticket#HVZT0F 1145 |
1146 |
1147 |
90 days later 1157 |
1160 |
From
1161 | WASHINGTON, DC 1162 | 1163 | 1164 |
✈ 1165 | 1166 |
Destination
1167 | HOUSTON, TX 1168 | 1169 | 1170 |
1174 |
Depart
1175 | 08:00 1176 |
1177 |
Date
1178 | 2015-11-23 1179 |
1183 |
Arriving
1184 | 2015-11-23 10:05 1185 |
Flight number
1186 | Flight number UA1558 with 1187 |
Ticket
1188 | 1189 |
1190 |
1191 |
  1201 |
1204 |
From
1205 | WASHINGTON, DC 1206 | 1207 | 1208 |
✈ 1209 | 1210 |
Destination
1211 | HOUSTON, TX 1212 | 1213 | 1214 |
1218 |
Depart
1219 | 08:00 1220 |
1221 |
Date
1222 | 2015-11-23 1223 |
1227 |
Arriving
1228 | 2015-11-23 10:05 1229 |
Flight number
1230 | Flight number UA1558 with 1231 |
Ticket
1232 | 1233 |
1234 |
1235 |
  1245 |
1248 |
From
1249 | WASHINGTON, DC 1250 | 1251 | 1252 |
✈ 1253 | 1254 |
Destination
1255 | HOUSTON, TX 1256 | 1257 | 1258 |
1262 |
Depart
1263 | 08:00 1264 |
1265 |
Date
1266 | 2015-11-23 1267 |
1271 |
Arriving
1272 | 2015-11-23 10:05 1273 |
Flight number
1274 | Flight number UA1558 with 1275 |
Ticket
1276 | 1277 |
1278 |
1279 |
  1289 |
1292 |
From
1293 | WASHINGTON, DC 1294 | 1295 | 1296 |
✈ 1297 | 1298 |
Destination
1299 | HOUSTON, TX 1300 | 1301 | 1302 |
1306 |
Depart
1307 | 08:00 1308 |
1309 |
Date
1310 | 2015-11-23 1311 |
1315 |
Arriving
1316 | 2015-11-23 10:05 1317 |
Flight number
1318 | Flight number UA1558 with 1319 |
Ticket
1320 | 1321 |
1322 |
1323 |
  1333 |
1336 |
From
1337 | HOUSTON, TX 1338 | 1339 | 1340 |
✈ 1341 | 1342 |
Destination
1343 | SJD 1344 | 1345 | 1346 |
1350 |
Depart
1351 | 11:25 1352 |
1353 |
Date
1354 | 2015-11-23 1355 |
1359 |
Arriving
1360 | 2015-11-23 13:26 1361 |
Flight number
1362 | Flight number UA1452 with 1363 |
Ticket
1364 | 1365 |
1366 |
1367 |
  1377 |
1380 |
From
1381 | HOUSTON, TX 1382 | 1383 | 1384 |
✈ 1385 | 1386 |
Destination
1387 | SJD 1388 | 1389 | 1390 |
1394 |
Depart
1395 | 11:25 1396 |
1397 |
Date
1398 | 2015-11-23 1399 |
1403 |
Arriving
1404 | 2015-11-23 13:26 1405 |
Flight number
1406 | Flight number UA1452 with 1407 |
Ticket
1408 | 1409 |
1410 |
1411 |
  1421 |
1424 |
From
1425 | HOUSTON, TX 1426 | 1427 | 1428 |
✈ 1429 | 1430 |
Destination
1431 | SJD 1432 | 1433 | 1434 |
1438 |
Depart
1439 | 11:25 1440 |
1441 |
Date
1442 | 2015-11-23 1443 |
1447 |
Arriving
1448 | 2015-11-23 13:26 1449 |
Flight number
1450 | Flight number UA1452 with 1451 |
Ticket
1452 | 1453 |
1454 |
1455 |
  1465 |
1468 |
From
1469 | HOUSTON, TX 1470 | 1471 | 1472 |
✈ 1473 | 1474 |
Destination
1475 | SJD 1476 | 1477 | 1478 |
1482 |
Depart
1483 | 11:25 1484 |
1485 |
Date
1486 | 2015-11-23 1487 |
1491 |
Arriving
1492 | 2015-11-23 13:26 1493 |
Flight number
1494 | Flight number UA1452 with 1495 |
Ticket
1496 | 1497 |
1498 |
1499 |
  1509 |
1512 |
From
1513 | SJD 1514 | 1515 | 1516 |
✈ 1517 | 1518 |
Destination
1519 | HOUSTON, TX 1520 | 1521 | 1522 |
1526 |
Depart
1527 | 14:16 1528 |
1529 |
Date
1530 | 2015-11-28 1531 |
1535 |
Arriving
1536 | 2015-11-28 17:50 1537 |
Flight number
1538 | Flight number UA1453 with 1539 |
Ticket
1540 | 1541 |
1542 |
1543 |
  1553 |
1556 |
From
1557 | SJD 1558 | 1559 | 1560 |
✈ 1561 | 1562 |
Destination
1563 | HOUSTON, TX 1564 | 1565 | 1566 |
1570 |
Depart
1571 | 14:16 1572 |
1573 |
Date
1574 | 2015-11-28 1575 |
1579 |
Arriving
1580 | 2015-11-28 17:50 1581 |
Flight number
1582 | Flight number UA1453 with 1583 |
Ticket
1584 | 1585 |
1586 |
1587 |
  1597 |
1600 |
From
1601 | SJD 1602 | 1603 | 1604 |
✈ 1605 | 1606 |
Destination
1607 | HOUSTON, TX 1608 | 1609 | 1610 |
1614 |
Depart
1615 | 14:16 1616 |
1617 |
Date
1618 | 2015-11-28 1619 |
1623 |
Arriving
1624 | 2015-11-28 17:50 1625 |
Flight number
1626 | Flight number UA1453 with 1627 |
Ticket
1628 | 1629 |
1630 |
1631 |
  1641 |
1644 |
From
1645 | SJD 1646 | 1647 | 1648 |
✈ 1649 | 1650 |
Destination
1651 | HOUSTON, TX 1652 | 1653 | 1654 |
1658 |
Depart
1659 | 14:16 1660 |
1661 |
Date
1662 | 2015-11-28 1663 |
1667 |
Arriving
1668 | 2015-11-28 17:50 1669 |
Flight number
1670 | Flight number UA1453 with 1671 |
Ticket
1672 | 1673 |
1674 |
1675 |
  1685 |
1688 |
From
1689 | SJD 1690 | 1691 | 1692 |
✈ 1693 | 1694 |
Destination
1695 | HOUSTON, TX 1696 | 1697 | 1698 |
1702 |
Depart
1703 | 14:16 1704 |
1705 |
Date
1706 | 2015-11-28 1707 |
1711 |
Arriving
1712 | 2015-11-28 17:50 1713 |
Flight number
1714 | Flight number UA1453 with 1715 |
Ticket
1716 | 1717 |
1718 |
1719 |
  1729 |
1732 |
From
1733 | SJD 1734 | 1735 | 1736 |
✈ 1737 | 1738 |
Destination
1739 | HOUSTON, TX 1740 | 1741 | 1742 |
1746 |
Depart
1747 | 14:16 1748 |
1749 |
Date
1750 | 2015-11-28 1751 |
1755 |
Arriving
1756 | 2015-11-28 17:50 1757 |
Flight number
1758 | Flight number UA1453 with 1759 |
Ticket
1760 | 1761 |
1762 |
1763 |
  1773 |
1776 |
From
1777 | SJD 1778 | 1779 | 1780 |
✈ 1781 | 1782 |
Destination
1783 | HOUSTON, TX 1784 | 1785 | 1786 |
1790 |
Depart
1791 | 14:16 1792 |
1793 |
Date
1794 | 2015-11-28 1795 |
1799 |
Arriving
1800 | 2015-11-28 17:50 1801 |
Flight number
1802 | Flight number UA1453 with 1803 |
Ticket
1804 | 1805 |
1806 |
1807 |
  1817 |
1820 |
From
1821 | SJD 1822 | 1823 | 1824 |
✈ 1825 | 1826 |
Destination
1827 | HOUSTON, TX 1828 | 1829 | 1830 |
1834 |
Depart
1835 | 14:16 1836 |
1837 |
Date
1838 | 2015-11-28 1839 |
1843 |
Arriving
1844 | 2015-11-28 17:50 1845 |
Flight number
1846 | Flight number UA1453 with 1847 |
Ticket
1848 | 1849 |
1850 |
1851 |
  1861 |
1864 |
From
1865 | HOUSTON, TX 1866 | 1867 | 1868 |
✈ 1869 | 1870 |
Destination
1871 | WASHINGTON, DC 1872 | 1873 | 1874 |
1878 |
Depart
1879 | 19:10 1880 |
1881 |
Date
1882 | 2015-11-28 1883 |
1887 |
Arriving
1888 | 2015-11-28 23:05 1889 |
Flight number
1890 | Flight number UA1955 with 1891 |
Ticket
1892 | 1893 |
1894 |
1895 |
  1905 |
1908 |
From
1909 | HOUSTON, TX 1910 | 1911 | 1912 |
✈ 1913 | 1914 |
Destination
1915 | WASHINGTON, DC 1916 | 1917 | 1918 |
1922 |
Depart
1923 | 19:10 1924 |
1925 |
Date
1926 | 2015-11-28 1927 |
1931 |
Arriving
1932 | 2015-11-28 23:05 1933 |
Flight number
1934 | Flight number UA1955 with 1935 |
Ticket
1936 | 1937 |
1938 |
1939 |
  1949 |
1952 |
From
1953 | HOUSTON, TX 1954 | 1955 | 1956 |
✈ 1957 | 1958 |
Destination
1959 | WASHINGTON, DC 1960 | 1961 | 1962 |
1966 |
Depart
1967 | 19:10 1968 |
1969 |
Date
1970 | 2015-11-28 1971 |
1975 |
Arriving
1976 | 2015-11-28 23:05 1977 |
Flight number
1978 | Flight number UA1955 with 1979 |
Ticket
1980 | 1981 |
1982 |
1983 |
  1993 |
1996 |
From
1997 | HOUSTON, TX 1998 | 1999 | 2000 |
✈ 2001 | 2002 |
Destination
2003 | WASHINGTON, DC 2004 | 2005 | 2006 |
2010 |
Depart
2011 | 19:10 2012 |
2013 |
Date
2014 | 2015-11-28 2015 |
2019 |
Arriving
2020 | 2015-11-28 23:05 2021 |
Flight number
2022 | Flight number UA1955 with 2023 |
Ticket
2024 | 2025 |
2026 |
2027 |