Adding a handler to detect the unlikely edge case where a message may
have an improperly quoted boundary that can cause the python library to fail to reproduce the original message with msg.as_bytes(). See: https://bugs.python.org/issue43818 and https://github.com/OfflineIMAP/offlineimap3/issues/62
This commit is contained in:
parent
6a45eef3b5
commit
b78af75064
@ -26,6 +26,7 @@ from email import policy
|
||||
from email.parser import BytesParser
|
||||
from email.generator import BytesGenerator
|
||||
from email.utils import parsedate_tz, mktime_tz
|
||||
from email.errors import NoBoundaryInMultipartDefect
|
||||
|
||||
from offlineimap import threadutil
|
||||
from offlineimap.ui import getglobalui
|
||||
|
@ -225,6 +225,44 @@ class MaildirFolder(BaseFolder):
|
||||
retval[uid] = date_excludees[uid]
|
||||
return retval
|
||||
|
||||
def _quote_boundary_fix(self, raw_msg_bytes):
|
||||
"""Modify a raw message to quote the boundary separator for multipart messages.
|
||||
|
||||
This function quotes only the first occurrence of the boundary field in
|
||||
the email header, and quotes any boundary value. Improperly quoted
|
||||
boundary fields can give the internal python email library issues.
|
||||
|
||||
:returns: The raw byte stream containing the quoted boundary
|
||||
"""
|
||||
# Use re.split to extract just the header, and search for the boundary in
|
||||
# the context-type header and extract just the boundary and characters per
|
||||
# RFC 2046 ( see https://tools.ietf.org/html/rfc2046#section-5.1.1 )
|
||||
# We don't cap the length to 70 characters, because we are just trying to
|
||||
# soft fix this message to resolve the python library looking for properly
|
||||
# quoted boundaries.
|
||||
try: boundary_field = \
|
||||
re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]*[\"]?)",
|
||||
re.split(b'[\r]?\n[\r]?\n',raw_msg_bytes)[0],re.IGNORECASE).group(1)
|
||||
except AttributeError:
|
||||
# No match
|
||||
return raw_msg_bytes
|
||||
# get the boundary field, and strip off any trailing ws (against RFC rules, leading ws is OK)
|
||||
# if it was already quoted, well then there was nothing to fix
|
||||
boundary, value = boundary_field.split(b'=',1)
|
||||
value = value.rstrip()
|
||||
# ord(b'"') == 34
|
||||
if value[0] == value[-1] == 34:
|
||||
# Sanity Check - Do not requote if already quoted.
|
||||
# A quoted boundary was the end goal so return the original
|
||||
#
|
||||
# No need to worry about if the original email did something like:
|
||||
# boundary="ahahah " as the email library will trim the ws for us
|
||||
return raw_msg_bytes
|
||||
else:
|
||||
new_field = b''.join([boundary,b'="',value,b'"'])
|
||||
return(raw_msg_bytes.replace(boundary_field,new_field,1))
|
||||
|
||||
|
||||
# Interface from BaseFolder
|
||||
def quickchanged(self, statusfolder):
|
||||
"""Returns True if the Maildir has changed
|
||||
@ -260,6 +298,18 @@ class MaildirFolder(BaseFolder):
|
||||
filepath = os.path.join(self.getfullname(), filename)
|
||||
fd = open(filepath, 'rb')
|
||||
retval = self.parser['8bit'].parse(fd)
|
||||
try:
|
||||
_ = retval.as_bytes(policy=self.policy['8bit'])
|
||||
except UnicodeEncodeError as err:
|
||||
if any(isinstance(defect, NoBoundaryInMultipartDefect) for defect in retval.defects):
|
||||
# (Hopefully) Rare instance where multipart boundary is not
|
||||
# properly quoted. Solve by fixing the boundary and parsing
|
||||
fd.seek(0)
|
||||
_buffer = fd.read()
|
||||
retval = self.parser['8bit'].parsebytes(_quote_boundary_fix(_buffer))
|
||||
else:
|
||||
# Unknown issue which is causing failure of as_bytes()
|
||||
ui.warn("Message has defects preventing it from being processed!")
|
||||
fd.close()
|
||||
return retval
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user