diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py index f757e75..58f4fcd 100644 --- a/offlineimap/folder/Base.py +++ b/offlineimap/folder/Base.py @@ -841,6 +841,76 @@ class BaseFolder: (uid, self.accountname)) raise # Raise on unknown errors, so we can fix those. + def _extract_message_id(self, raw_msg_bytes): + """Extract the Message-ID from a bytes object containing a raw message. + + This function attempts to find the Message-ID for a message that has not + been processed by the built-in email library, and is therefore NOT an + email object. If parsing the message fails (or is otherwise not + needed), this utility can be useful to help provide a (hopefully) unique + identifier in log messages to facilitate locating the message on disk. + + :param raw_msg_bytes: bytes object containing the raw email message. + :returns: A tuple containing the contents of the Message-ID header if + found (or if not found) and a flag which is True if + the Message-ID was in proper RFC format or False if it contained + defects. + """ + msg_header = re.split(b'[\r]?\n[\r]?\n', raw_msg_bytes)[0] + try: + msg_id = re.search(b"\nmessage-id:[\s]+(<[A-Za-z0-9!#$%&'*+-/=?^_`{}|~.@ ]+>)", + msg_header, re.IGNORECASE).group(1) + except AttributeError: + # No match - Likely not following RFC rules. Try and find anything + # that looks like it could be the Message-ID but flag it. + _start_pos = msg_header.find(b'\nMessage-ID:') + if _start_pos > 0: + _end_pos = msg_header.find(b'\n',_start_pos+15) + msg_id = msg_header[_start_pos+12:_end_pos].strip() + return (msg_id, False) + else: + return (b"", False) + return (msg_id, True) + + def _quote_boundary_fix(self, raw_msg_bytes): + """Modify a raw message to quote the boundary separator for multipart messages. + + This function quotes only the first occurrence of the boundary field in + the email header, and quotes any boundary value. Improperly quoted + boundary fields can give the internal python email library issues. + + :param raw_msg_bytes: bytes object containing the raw email message. + :returns: The raw byte stream containing the quoted boundary + """ + # Use re.split to extract just the header, and search for the boundary in + # the context-type header and extract just the boundary and characters per + # RFC 2046 ( see https://tools.ietf.org/html/rfc2046#section-5.1.1 ) + # We don't cap the length to 70 characters, because we are just trying to + # soft fix this message to resolve the python library looking for properly + # quoted boundaries. + try: boundary_field = \ + re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]+[\"]?)", + re.split(b'[\r]?\n[\r]?\n', raw_msg_bytes)[0], + (re.IGNORECASE|re.DOTALL)).group(1) + except AttributeError: + # No match + return raw_msg_bytes + # get the boundary field, and strip off any trailing ws (against RFC rules, leading ws is OK) + # if it was already quoted, well then there was nothing to fix + boundary, value = boundary_field.split(b'=', 1) + value = value.rstrip() + # ord(b'"') == 34 + if value[0] == value[-1] == 34: + # Sanity Check - Do not requote if already quoted. + # A quoted boundary was the end goal so return the original + # + # No need to worry about if the original email did something like: + # boundary="ahahah " as the email library will trim the ws for us + return raw_msg_bytes + else: + new_field = b''.join([boundary, b'="', value, b'"']) + return(raw_msg_bytes.replace(boundary_field, new_field, 1)) + def __syncmessagesto_copy(self, dstfolder, statusfolder): """Pass1: Copy locally existing messages not on the other side. diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py index 16d65da..c9318c2 100644 --- a/offlineimap/folder/IMAP.py +++ b/offlineimap/folder/IMAP.py @@ -24,6 +24,7 @@ from offlineimap import imaputil, imaplibutil, OfflineImapError from offlineimap import globals from imaplib2 import MonthNames from .Base import BaseFolder +from email.errors import NoBoundaryInMultipartDefect # Globals CRLF = '\r\n' @@ -735,10 +736,8 @@ class IMAPFolder(BaseFolder): raise OfflineImapError( "Saving msg (%s) in folder '%s', " "repository '%s' failed (abort). " - "Server responded: %s\n" - "Message content was: %s" % - (msg_id, self, self.getrepository(), - str(e), dbg_output), + "Server responded: %s\n" % + (msg_id, self, self.getrepository(), str(e)), OfflineImapError.ERROR.MESSAGE, exc_info()[2]) @@ -752,10 +751,8 @@ class IMAPFolder(BaseFolder): imapobj = None raise OfflineImapError( "Saving msg (%s) folder '%s', repo '%s'" - "failed (error). Server responded: %s\n" - "Message content was: %s" % - (msg_id, self, self.getrepository(), - str(e), dbg_output), + "failed (error). Server responded: %s\n" % + (msg_id, self, self.getrepository(), str(e)), OfflineImapError.ERROR.MESSAGE, exc_info()[2]) @@ -905,7 +902,35 @@ class IMAPFolder(BaseFolder): # Convert email, d[0][1], into a message object (from bytes) ndata0 = data[0][0].decode('utf-8') - ndata1 = self.parser['8bit-RFC'].parsebytes(data[0][1]) + try: ndata1 = self.parser['8bit-RFC'].parsebytes(data[0][1]) + except: + err = exc_info() + response_type = type(data[0][1]).__name__ + msg_id = self._extract_message_id(data[0][1])[0].decode('ascii',errors='surrogateescape') + raise OfflineImapError( + "Exception parsing message with ID ({}) from imaplib (response type: {}).\n {}: {}".format( + msg_id, response_type, err[0].__name__, err[1]), + OfflineImapError.ERROR.MESSAGE) + if len(ndata1.defects) > 0: + # We don't automatically apply fixes as to attempt to preserve the original message + self.ui.warn("UID {} has defects: {}".format(uids, ndata1.defects)) + if any(isinstance(defect, NoBoundaryInMultipartDefect) for defect in ndata1.defects): + # (Hopefully) Rare defect from a broken client where multipart boundary is + # not properly quoted. Attempt to solve by fixing the boundary and parsing + self.ui.warn(" ... applying multipart boundary fix.") + ndata1 = self.parser['8bit-RFC'].parsebytes(self._quote_boundary_fix(data[0][1])) + try: + # See if the defects after fixes are preventing us from obtaining bytes + _ = ndata1.as_bytes(policy=self.policy['8bit-RFC']) + except UnicodeEncodeError as err: + # Unknown issue which is causing failure of as_bytes() + msg_id = self.getmessageheader(ndata1, "message-id") + if msg_id is None: + msg_id = '' + raise OfflineImapError( + "UID {} ({}) has defects preventing it from being processed!\n {}: {}".format( + uids, msg_id, type(err).__name__, err), + OfflineImapError.ERROR.MESSAGE) ndata = [ndata0, ndata1] return ndata diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 74a4ab2..3df4afa 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -25,6 +25,7 @@ from threading import Lock from hashlib import md5 from offlineimap import OfflineImapError from .Base import BaseFolder +from email.errors import NoBoundaryInMultipartDefect # Find the UID in a message filename re_uidmatch = re.compile(',U=(\d+)') @@ -259,8 +260,36 @@ class MaildirFolder(BaseFolder): filename = self.messagelist[uid]['filename'] filepath = os.path.join(self.getfullname(), filename) fd = open(filepath, 'rb') - retval = self.parser['8bit'].parse(fd) + _fd_bytes = fd.read() fd.close() + try: retval = self.parser['8bit'].parsebytes(_fd_bytes) + except: + err = exc_info() + msg_id = self._extract_message_id(_fd_bytes)[0].decode('ascii',errors='surrogateescape') + raise OfflineImapError( + "Exception parsing message with ID ({}) from file ({}).\n {}: {}".format( + msg_id, filename, err[0].__name__, err[1]), + OfflineImapError.ERROR.MESSAGE) + if len(retval.defects) > 0: + # We don't automatically apply fixes as to attempt to preserve the original message + self.ui.warn("UID {} has defects: {}".format(uid, retval.defects)) + if any(isinstance(defect, NoBoundaryInMultipartDefect) for defect in retval.defects): + # (Hopefully) Rare defect from a broken client where multipart boundary is + # not properly quoted. Attempt to solve by fixing the boundary and parsing + self.ui.warn(" ... applying multipart boundary fix.") + retval = self.parser['8bit'].parsebytes(self._quote_boundary_fix(_fd_bytes)) + try: + # See if the defects after fixes are preventing us from obtaining bytes + _ = retval.as_bytes(policy=self.policy['8bit']) + except UnicodeEncodeError as err: + # Unknown issue which is causing failure of as_bytes() + msg_id = self.getmessageheader(retval, "message-id") + if msg_id is None: + msg_id = '' + raise OfflineImapError( + "UID {} ({}) has defects preventing it from being processed!\n {}: {}".format( + uid, msg_id, type(err).__name__, err), + OfflineImapError.ERROR.MESSAGE) return retval # Interface from BaseFolder