From 5b976fc5da659812942073664b5f9ca8ec81f649 Mon Sep 17 00:00:00 2001 From: Joseph Ishac Date: Mon, 7 Jun 2021 21:57:54 -0400 Subject: [PATCH] Making error handling when parsing messages consistent --- offlineimap/folder/IMAP.py | 39 ++++++++++++++++++++++++----------- offlineimap/folder/Maildir.py | 9 +++++++- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py index 764f428..bfd232e 100644 --- a/offlineimap/folder/IMAP.py +++ b/offlineimap/folder/IMAP.py @@ -903,18 +903,33 @@ class IMAPFolder(BaseFolder): ndata0 = data[0][0].decode('utf-8') try: ndata1 = self.parser['8bit-RFC'].parsebytes(data[0][1]) except: - e = exc_info() - response_type = type(data[0][1]).__name__ - try: msg_id = \ - re.search(b"message-id:.*(<[A-Za-z0-9!#$%&'*+-/=?^_`{}|~.@ ]+>)", - re.split(b'[\r]?\n[\r]?\n', bytes(data[0][1]))[0], re.IGNORECASE).group(1) - except AttributeError: - # No match - msg_id = b"" - raise OfflineImapError( - "Exception parsing message ({} type {}) from imaplib.\n {}: {}".format( - msg_id, response_type, e[0].__name__, e[1]), - OfflineImapError.ERROR.MESSAGE) + err = exc_info() + response_type = type(data[0][1]).__name__ + msg_id = self._extract_message_id(data[0][1])[0].decode('ascii',errors='surrogateescape') + raise OfflineImapError( + "Exception parsing message with ID ({}) from imaplib (response type: {}).\n {}: {}".format( + msg_id, response_type, err[0].__name__, err[1]), + OfflineImapError.ERROR.MESSAGE) + if len(ndata1.defects) > 0: + # We don't automatically apply fixes as to attempt to preserve the original message + self.ui.warn("UID {} has defects: {}".format(uids, ndata1.defects)) + if any(isinstance(defect, NoBoundaryInMultipartDefect) for defect in ndata1.defects): + # (Hopefully) Rare defect from a broken client where multipart boundary is + # not properly quoted. Attempt to solve by fixing the boundary and parsing + self.ui.warn(" ... applying multipart boundary fix.") + ndata1 = self.parser['8bit-RFC'].parsebytes(self._quote_boundary_fix(data[0][1])) + try: + # See if the defects after fixes are preventing us from obtaining bytes + _ = ndata1.as_bytes(policy=self.policy['8bit-RFC']) + except UnicodeEncodeError as err: + # Unknown issue which is causing failure of as_bytes() + msg_id = self.getmessageheader(ndata1, "message-id") + if msg_id is None: + msg_id = '' + raise OfflineImapError( + "UID {} ({}) has defects preventing it from being processed!\n {}: {}".format( + uids, msg_id, type(err).__name__, err), + OfflineImapError.ERROR.MESSAGE) ndata = [ndata0, ndata1] return ndata diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 92ac124..3df4afa 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -262,7 +262,14 @@ class MaildirFolder(BaseFolder): fd = open(filepath, 'rb') _fd_bytes = fd.read() fd.close() - retval = self.parser['8bit'].parsebytes(_fd_bytes) + try: retval = self.parser['8bit'].parsebytes(_fd_bytes) + except: + err = exc_info() + msg_id = self._extract_message_id(_fd_bytes)[0].decode('ascii',errors='surrogateescape') + raise OfflineImapError( + "Exception parsing message with ID ({}) from file ({}).\n {}: {}".format( + msg_id, filename, err[0].__name__, err[1]), + OfflineImapError.ERROR.MESSAGE) if len(retval.defects) > 0: # We don't automatically apply fixes as to attempt to preserve the original message self.ui.warn("UID {} has defects: {}".format(uid, retval.defects))