From 6a45eef3b5d915ed2b6d6077b93c0da35be4e05c Mon Sep 17 00:00:00 2001 From: Joseph Ishac Date: Mon, 12 Apr 2021 22:58:58 -0400 Subject: [PATCH 01/10] Fixed a minor bug discovered if server is unreachable when debugging --- offlineimap/folder/IMAP.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py index 16d65da..c133927 100644 --- a/offlineimap/folder/IMAP.py +++ b/offlineimap/folder/IMAP.py @@ -735,10 +735,8 @@ class IMAPFolder(BaseFolder): raise OfflineImapError( "Saving msg (%s) in folder '%s', " "repository '%s' failed (abort). " - "Server responded: %s\n" - "Message content was: %s" % - (msg_id, self, self.getrepository(), - str(e), dbg_output), + "Server responded: %s\n" % + (msg_id, self, self.getrepository(), str(e)), OfflineImapError.ERROR.MESSAGE, exc_info()[2]) @@ -752,10 +750,8 @@ class IMAPFolder(BaseFolder): imapobj = None raise OfflineImapError( "Saving msg (%s) folder '%s', repo '%s'" - "failed (error). Server responded: %s\n" - "Message content was: %s" % - (msg_id, self, self.getrepository(), - str(e), dbg_output), + "failed (error). Server responded: %s\n" % + (msg_id, self, self.getrepository(), str(e)), OfflineImapError.ERROR.MESSAGE, exc_info()[2]) From b78af7506442878c51c1e82f1ef36149539d06de Mon Sep 17 00:00:00 2001 From: Joseph Ishac Date: Tue, 13 Apr 2021 00:01:26 -0400 Subject: [PATCH 02/10] Adding a handler to detect the unlikely edge case where a message may have an improperly quoted boundary that can cause the python library to fail to reproduce the original message with msg.as_bytes(). See: https://bugs.python.org/issue43818 and https://github.com/OfflineIMAP/offlineimap3/issues/62 --- offlineimap/folder/Base.py | 1 + offlineimap/folder/Maildir.py | 50 +++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py index f757e75..5f07960 100644 --- a/offlineimap/folder/Base.py +++ b/offlineimap/folder/Base.py @@ -26,6 +26,7 @@ from email import policy from email.parser import BytesParser from email.generator import BytesGenerator from email.utils import parsedate_tz, mktime_tz +from email.errors import NoBoundaryInMultipartDefect from offlineimap import threadutil from offlineimap.ui import getglobalui diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 74a4ab2..827d8ce 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -225,6 +225,44 @@ class MaildirFolder(BaseFolder): retval[uid] = date_excludees[uid] return retval + def _quote_boundary_fix(self, raw_msg_bytes): + """Modify a raw message to quote the boundary separator for multipart messages. + + This function quotes only the first occurrence of the boundary field in + the email header, and quotes any boundary value. Improperly quoted + boundary fields can give the internal python email library issues. + + :returns: The raw byte stream containing the quoted boundary + """ + # Use re.split to extract just the header, and search for the boundary in + # the context-type header and extract just the boundary and characters per + # RFC 2046 ( see https://tools.ietf.org/html/rfc2046#section-5.1.1 ) + # We don't cap the length to 70 characters, because we are just trying to + # soft fix this message to resolve the python library looking for properly + # quoted boundaries. + try: boundary_field = \ + re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]*[\"]?)", + re.split(b'[\r]?\n[\r]?\n',raw_msg_bytes)[0],re.IGNORECASE).group(1) + except AttributeError: + # No match + return raw_msg_bytes + # get the boundary field, and strip off any trailing ws (against RFC rules, leading ws is OK) + # if it was already quoted, well then there was nothing to fix + boundary, value = boundary_field.split(b'=',1) + value = value.rstrip() + # ord(b'"') == 34 + if value[0] == value[-1] == 34: + # Sanity Check - Do not requote if already quoted. + # A quoted boundary was the end goal so return the original + # + # No need to worry about if the original email did something like: + # boundary="ahahah " as the email library will trim the ws for us + return raw_msg_bytes + else: + new_field = b''.join([boundary,b'="',value,b'"']) + return(raw_msg_bytes.replace(boundary_field,new_field,1)) + + # Interface from BaseFolder def quickchanged(self, statusfolder): """Returns True if the Maildir has changed @@ -260,6 +298,18 @@ class MaildirFolder(BaseFolder): filepath = os.path.join(self.getfullname(), filename) fd = open(filepath, 'rb') retval = self.parser['8bit'].parse(fd) + try: + _ = retval.as_bytes(policy=self.policy['8bit']) + except UnicodeEncodeError as err: + if any(isinstance(defect, NoBoundaryInMultipartDefect) for defect in retval.defects): + # (Hopefully) Rare instance where multipart boundary is not + # properly quoted. Solve by fixing the boundary and parsing + fd.seek(0) + _buffer = fd.read() + retval = self.parser['8bit'].parsebytes(_quote_boundary_fix(_buffer)) + else: + # Unknown issue which is causing failure of as_bytes() + ui.warn("Message has defects preventing it from being processed!") fd.close() return retval From 0345390aa1b70d0b80bbccf8331ee592856508e5 Mon Sep 17 00:00:00 2001 From: Joseph Ishac Date: Wed, 14 Apr 2021 11:14:44 -0400 Subject: [PATCH 03/10] Fixed oversight in regular expression (boundary cannot be empty) --- offlineimap/folder/Maildir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 827d8ce..64527c4 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -241,7 +241,7 @@ class MaildirFolder(BaseFolder): # soft fix this message to resolve the python library looking for properly # quoted boundaries. try: boundary_field = \ - re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]*[\"]?)", + re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]+[\"]?)", re.split(b'[\r]?\n[\r]?\n',raw_msg_bytes)[0],re.IGNORECASE).group(1) except AttributeError: # No match From f024bb9e4cf5b153e5f156af910254c24381e502 Mon Sep 17 00:00:00 2001 From: Joseph Ishac Date: Wed, 14 Apr 2021 11:19:22 -0400 Subject: [PATCH 04/10] Wrap bytes check in a wrapper to be a bit more efficient --- offlineimap/folder/Maildir.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 64527c4..53525de 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -299,7 +299,11 @@ class MaildirFolder(BaseFolder): fd = open(filepath, 'rb') retval = self.parser['8bit'].parse(fd) try: - _ = retval.as_bytes(policy=self.policy['8bit']) + if len(retval.defects) > 0: + ui.warn("Message has defects: {}".format(retval.defects)) + # See if the defects are preventing us from obtaining bytes and + # handle known issues + _ = retval.as_bytes(policy=self.policy['8bit']) except UnicodeEncodeError as err: if any(isinstance(defect, NoBoundaryInMultipartDefect) for defect in retval.defects): # (Hopefully) Rare instance where multipart boundary is not From a4532294ae65b0d731b02d10c9f123217c635352 Mon Sep 17 00:00:00 2001 From: Joseph Ishac Date: Wed, 14 Apr 2021 14:54:25 -0400 Subject: [PATCH 05/10] Correcting an issue where dbg_output was not defined when the server was unreachable due to an optimization in PR#56. Since message-id is more useful to better pin point the correct message, removing dbg_output. Also fixing https://github.com/OfflineIMAP/offlineimap3/issues/62 by correcting broken multipart boundaries or raising an error if as_bytes() fails. Related python bug submitted: https://bugs.python.org/issue43818 although this workaround should be sufficent in the interim. Signed-off-by: Joseph Ishac --- offlineimap/folder/Base.py | 1 - offlineimap/folder/Maildir.py | 40 ++++++++++++++++++++--------------- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py index 5f07960..f757e75 100644 --- a/offlineimap/folder/Base.py +++ b/offlineimap/folder/Base.py @@ -26,7 +26,6 @@ from email import policy from email.parser import BytesParser from email.generator import BytesGenerator from email.utils import parsedate_tz, mktime_tz -from email.errors import NoBoundaryInMultipartDefect from offlineimap import threadutil from offlineimap.ui import getglobalui diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 53525de..baabc28 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -25,6 +25,7 @@ from threading import Lock from hashlib import md5 from offlineimap import OfflineImapError from .Base import BaseFolder +from email.errors import NoBoundaryInMultipartDefect # Find the UID in a message filename re_uidmatch = re.compile(',U=(\d+)') @@ -297,24 +298,29 @@ class MaildirFolder(BaseFolder): filename = self.messagelist[uid]['filename'] filepath = os.path.join(self.getfullname(), filename) fd = open(filepath, 'rb') - retval = self.parser['8bit'].parse(fd) - try: - if len(retval.defects) > 0: - ui.warn("Message has defects: {}".format(retval.defects)) - # See if the defects are preventing us from obtaining bytes and - # handle known issues - _ = retval.as_bytes(policy=self.policy['8bit']) - except UnicodeEncodeError as err: - if any(isinstance(defect, NoBoundaryInMultipartDefect) for defect in retval.defects): - # (Hopefully) Rare instance where multipart boundary is not - # properly quoted. Solve by fixing the boundary and parsing - fd.seek(0) - _buffer = fd.read() - retval = self.parser['8bit'].parsebytes(_quote_boundary_fix(_buffer)) - else: - # Unknown issue which is causing failure of as_bytes() - ui.warn("Message has defects preventing it from being processed!") + _fd_bytes = fd.read() fd.close() + retval = self.parser['8bit'].parsebytes(_fd_bytes) + if len(retval.defects) > 0: + # We don't automatically apply fixes as to attempt to preserve the original message + self.ui.warn("UID {} has defects: {}".format(uid, retval.defects)) + if any(isinstance(defect, NoBoundaryInMultipartDefect) for defect in retval.defects): + # (Hopefully) Rare defect from a broken client where multipart boundary is + # not properly quoted. Attempt to solve by fixing the boundary and parsing + self.ui.warn(" ... applying multipart boundary fix.") + retval = self.parser['8bit'].parsebytes(self._quote_boundary_fix(_fd_bytes)) + try: + # See if the defects after fixes are preventing us from obtaining bytes + _ = retval.as_bytes(policy=self.policy['8bit']) + except UnicodeEncodeError as err: + # Unknown issue which is causing failure of as_bytes() + msg_id = self.getmessageheader(retval, "message-id") + if msg_id is None: + msg_id = '' + raise OfflineImapError( + "UID {} ({}) has defects preventing it from being processed!\n {}: {}".format( + uid, msg_id, type(err).__name__, err), + OfflineImapError.ERROR.MESSAGE) return retval # Interface from BaseFolder From 84b96e1daa485d7fb5576f34c5c61254d4321e29 Mon Sep 17 00:00:00 2001 From: Joseph Ishac Date: Wed, 14 Apr 2021 17:08:16 -0400 Subject: [PATCH 06/10] Adding re.DOTALL to catch potentially folded lines and fixed formatting --- offlineimap/folder/Maildir.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index baabc28..50da5a2 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -243,13 +243,14 @@ class MaildirFolder(BaseFolder): # quoted boundaries. try: boundary_field = \ re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]+[\"]?)", - re.split(b'[\r]?\n[\r]?\n',raw_msg_bytes)[0],re.IGNORECASE).group(1) + re.split(b'[\r]?\n[\r]?\n', raw_msg_bytes)[0], + (re.IGNORECASE|re.DOTALL)).group(1) except AttributeError: # No match return raw_msg_bytes # get the boundary field, and strip off any trailing ws (against RFC rules, leading ws is OK) # if it was already quoted, well then there was nothing to fix - boundary, value = boundary_field.split(b'=',1) + boundary, value = boundary_field.split(b'=', 1) value = value.rstrip() # ord(b'"') == 34 if value[0] == value[-1] == 34: @@ -260,8 +261,8 @@ class MaildirFolder(BaseFolder): # boundary="ahahah " as the email library will trim the ws for us return raw_msg_bytes else: - new_field = b''.join([boundary,b'="',value,b'"']) - return(raw_msg_bytes.replace(boundary_field,new_field,1)) + new_field = b''.join([boundary, b'="', value, b'"']) + return(raw_msg_bytes.replace(boundary_field, new_field, 1)) # Interface from BaseFolder From b0aad911ab39fef1cb5c4ca626ae353bca326f18 Mon Sep 17 00:00:00 2001 From: Joseph Ishac Date: Mon, 7 Jun 2021 14:05:07 -0400 Subject: [PATCH 07/10] Adding some extra error handling when parsing from imaplib --- offlineimap/folder/IMAP.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py index 16d65da..05c977f 100644 --- a/offlineimap/folder/IMAP.py +++ b/offlineimap/folder/IMAP.py @@ -905,7 +905,20 @@ class IMAPFolder(BaseFolder): # Convert email, d[0][1], into a message object (from bytes) ndata0 = data[0][0].decode('utf-8') - ndata1 = self.parser['8bit-RFC'].parsebytes(data[0][1]) + try: ndata1 = self.parser['8bit-RFC'].parsebytes(data[0][1]) + except: + e = exc_info() + response_type = type(data[0][1]).__name__ + try: msg_id = \ + re.search(b"message-id:.*(<[A-Za-z0-9!#$%&'*+-/=?^_`{}|~.@ ]+>)", + re.split(b'[\r]?\n[\r]?\n', bytes(data[0][1]))[0], re.IGNORECASE).group(1) + except AttributeError: + # No match + msg_id = b"" + raise OfflineImapError( + "Exception parsing message ({} type {}) from imaplib.\n {}: {}".format( + msg_id, response_type, e[0].__name__, e[1]), + OfflineImapError.ERROR.MESSAGE) ndata = [ndata0, ndata1] return ndata From 9bebcbe4f7f335e4b207243a38e26651234ac2df Mon Sep 17 00:00:00 2001 From: Joseph Ishac Date: Mon, 7 Jun 2021 20:26:26 -0400 Subject: [PATCH 08/10] Adding utilities to Base class. Moving the quoted boundary fix to the Base class so that it can be used by any subclass that needs to read an email. Adding another utility to extract message-id from a raw email. --- offlineimap/folder/Base.py | 70 +++++++++++++++++++++++++++++++++++ offlineimap/folder/Maildir.py | 39 ------------------- 2 files changed, 70 insertions(+), 39 deletions(-) diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py index f757e75..58f4fcd 100644 --- a/offlineimap/folder/Base.py +++ b/offlineimap/folder/Base.py @@ -841,6 +841,76 @@ class BaseFolder: (uid, self.accountname)) raise # Raise on unknown errors, so we can fix those. + def _extract_message_id(self, raw_msg_bytes): + """Extract the Message-ID from a bytes object containing a raw message. + + This function attempts to find the Message-ID for a message that has not + been processed by the built-in email library, and is therefore NOT an + email object. If parsing the message fails (or is otherwise not + needed), this utility can be useful to help provide a (hopefully) unique + identifier in log messages to facilitate locating the message on disk. + + :param raw_msg_bytes: bytes object containing the raw email message. + :returns: A tuple containing the contents of the Message-ID header if + found (or if not found) and a flag which is True if + the Message-ID was in proper RFC format or False if it contained + defects. + """ + msg_header = re.split(b'[\r]?\n[\r]?\n', raw_msg_bytes)[0] + try: + msg_id = re.search(b"\nmessage-id:[\s]+(<[A-Za-z0-9!#$%&'*+-/=?^_`{}|~.@ ]+>)", + msg_header, re.IGNORECASE).group(1) + except AttributeError: + # No match - Likely not following RFC rules. Try and find anything + # that looks like it could be the Message-ID but flag it. + _start_pos = msg_header.find(b'\nMessage-ID:') + if _start_pos > 0: + _end_pos = msg_header.find(b'\n',_start_pos+15) + msg_id = msg_header[_start_pos+12:_end_pos].strip() + return (msg_id, False) + else: + return (b"", False) + return (msg_id, True) + + def _quote_boundary_fix(self, raw_msg_bytes): + """Modify a raw message to quote the boundary separator for multipart messages. + + This function quotes only the first occurrence of the boundary field in + the email header, and quotes any boundary value. Improperly quoted + boundary fields can give the internal python email library issues. + + :param raw_msg_bytes: bytes object containing the raw email message. + :returns: The raw byte stream containing the quoted boundary + """ + # Use re.split to extract just the header, and search for the boundary in + # the context-type header and extract just the boundary and characters per + # RFC 2046 ( see https://tools.ietf.org/html/rfc2046#section-5.1.1 ) + # We don't cap the length to 70 characters, because we are just trying to + # soft fix this message to resolve the python library looking for properly + # quoted boundaries. + try: boundary_field = \ + re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]+[\"]?)", + re.split(b'[\r]?\n[\r]?\n', raw_msg_bytes)[0], + (re.IGNORECASE|re.DOTALL)).group(1) + except AttributeError: + # No match + return raw_msg_bytes + # get the boundary field, and strip off any trailing ws (against RFC rules, leading ws is OK) + # if it was already quoted, well then there was nothing to fix + boundary, value = boundary_field.split(b'=', 1) + value = value.rstrip() + # ord(b'"') == 34 + if value[0] == value[-1] == 34: + # Sanity Check - Do not requote if already quoted. + # A quoted boundary was the end goal so return the original + # + # No need to worry about if the original email did something like: + # boundary="ahahah " as the email library will trim the ws for us + return raw_msg_bytes + else: + new_field = b''.join([boundary, b'="', value, b'"']) + return(raw_msg_bytes.replace(boundary_field, new_field, 1)) + def __syncmessagesto_copy(self, dstfolder, statusfolder): """Pass1: Copy locally existing messages not on the other side. diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 50da5a2..92ac124 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -226,45 +226,6 @@ class MaildirFolder(BaseFolder): retval[uid] = date_excludees[uid] return retval - def _quote_boundary_fix(self, raw_msg_bytes): - """Modify a raw message to quote the boundary separator for multipart messages. - - This function quotes only the first occurrence of the boundary field in - the email header, and quotes any boundary value. Improperly quoted - boundary fields can give the internal python email library issues. - - :returns: The raw byte stream containing the quoted boundary - """ - # Use re.split to extract just the header, and search for the boundary in - # the context-type header and extract just the boundary and characters per - # RFC 2046 ( see https://tools.ietf.org/html/rfc2046#section-5.1.1 ) - # We don't cap the length to 70 characters, because we are just trying to - # soft fix this message to resolve the python library looking for properly - # quoted boundaries. - try: boundary_field = \ - re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]+[\"]?)", - re.split(b'[\r]?\n[\r]?\n', raw_msg_bytes)[0], - (re.IGNORECASE|re.DOTALL)).group(1) - except AttributeError: - # No match - return raw_msg_bytes - # get the boundary field, and strip off any trailing ws (against RFC rules, leading ws is OK) - # if it was already quoted, well then there was nothing to fix - boundary, value = boundary_field.split(b'=', 1) - value = value.rstrip() - # ord(b'"') == 34 - if value[0] == value[-1] == 34: - # Sanity Check - Do not requote if already quoted. - # A quoted boundary was the end goal so return the original - # - # No need to worry about if the original email did something like: - # boundary="ahahah " as the email library will trim the ws for us - return raw_msg_bytes - else: - new_field = b''.join([boundary, b'="', value, b'"']) - return(raw_msg_bytes.replace(boundary_field, new_field, 1)) - - # Interface from BaseFolder def quickchanged(self, statusfolder): """Returns True if the Maildir has changed From 5b976fc5da659812942073664b5f9ca8ec81f649 Mon Sep 17 00:00:00 2001 From: Joseph Ishac Date: Mon, 7 Jun 2021 21:57:54 -0400 Subject: [PATCH 09/10] Making error handling when parsing messages consistent --- offlineimap/folder/IMAP.py | 39 ++++++++++++++++++++++++----------- offlineimap/folder/Maildir.py | 9 +++++++- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py index 764f428..bfd232e 100644 --- a/offlineimap/folder/IMAP.py +++ b/offlineimap/folder/IMAP.py @@ -903,18 +903,33 @@ class IMAPFolder(BaseFolder): ndata0 = data[0][0].decode('utf-8') try: ndata1 = self.parser['8bit-RFC'].parsebytes(data[0][1]) except: - e = exc_info() - response_type = type(data[0][1]).__name__ - try: msg_id = \ - re.search(b"message-id:.*(<[A-Za-z0-9!#$%&'*+-/=?^_`{}|~.@ ]+>)", - re.split(b'[\r]?\n[\r]?\n', bytes(data[0][1]))[0], re.IGNORECASE).group(1) - except AttributeError: - # No match - msg_id = b"" - raise OfflineImapError( - "Exception parsing message ({} type {}) from imaplib.\n {}: {}".format( - msg_id, response_type, e[0].__name__, e[1]), - OfflineImapError.ERROR.MESSAGE) + err = exc_info() + response_type = type(data[0][1]).__name__ + msg_id = self._extract_message_id(data[0][1])[0].decode('ascii',errors='surrogateescape') + raise OfflineImapError( + "Exception parsing message with ID ({}) from imaplib (response type: {}).\n {}: {}".format( + msg_id, response_type, err[0].__name__, err[1]), + OfflineImapError.ERROR.MESSAGE) + if len(ndata1.defects) > 0: + # We don't automatically apply fixes as to attempt to preserve the original message + self.ui.warn("UID {} has defects: {}".format(uids, ndata1.defects)) + if any(isinstance(defect, NoBoundaryInMultipartDefect) for defect in ndata1.defects): + # (Hopefully) Rare defect from a broken client where multipart boundary is + # not properly quoted. Attempt to solve by fixing the boundary and parsing + self.ui.warn(" ... applying multipart boundary fix.") + ndata1 = self.parser['8bit-RFC'].parsebytes(self._quote_boundary_fix(data[0][1])) + try: + # See if the defects after fixes are preventing us from obtaining bytes + _ = ndata1.as_bytes(policy=self.policy['8bit-RFC']) + except UnicodeEncodeError as err: + # Unknown issue which is causing failure of as_bytes() + msg_id = self.getmessageheader(ndata1, "message-id") + if msg_id is None: + msg_id = '' + raise OfflineImapError( + "UID {} ({}) has defects preventing it from being processed!\n {}: {}".format( + uids, msg_id, type(err).__name__, err), + OfflineImapError.ERROR.MESSAGE) ndata = [ndata0, ndata1] return ndata diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 92ac124..3df4afa 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -262,7 +262,14 @@ class MaildirFolder(BaseFolder): fd = open(filepath, 'rb') _fd_bytes = fd.read() fd.close() - retval = self.parser['8bit'].parsebytes(_fd_bytes) + try: retval = self.parser['8bit'].parsebytes(_fd_bytes) + except: + err = exc_info() + msg_id = self._extract_message_id(_fd_bytes)[0].decode('ascii',errors='surrogateescape') + raise OfflineImapError( + "Exception parsing message with ID ({}) from file ({}).\n {}: {}".format( + msg_id, filename, err[0].__name__, err[1]), + OfflineImapError.ERROR.MESSAGE) if len(retval.defects) > 0: # We don't automatically apply fixes as to attempt to preserve the original message self.ui.warn("UID {} has defects: {}".format(uid, retval.defects)) From 9e0fb59bdf325044a535d14d354e3887cc1d6e1b Mon Sep 17 00:00:00 2001 From: Joseph Ishac Date: Mon, 7 Jun 2021 22:18:31 -0400 Subject: [PATCH 10/10] Adding missing import of NoBoundaryInMultipartDefect --- offlineimap/folder/IMAP.py | 1 + 1 file changed, 1 insertion(+) diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py index bfd232e..c9318c2 100644 --- a/offlineimap/folder/IMAP.py +++ b/offlineimap/folder/IMAP.py @@ -24,6 +24,7 @@ from offlineimap import imaputil, imaplibutil, OfflineImapError from offlineimap import globals from imaplib2 import MonthNames from .Base import BaseFolder +from email.errors import NoBoundaryInMultipartDefect # Globals CRLF = '\r\n'