Adding a handler to detect the unlikely edge case where a message may
have an improperly quoted boundary that can cause the python library to fail to reproduce the original message with msg.as_bytes(). See: https://bugs.python.org/issue43818 and https://github.com/OfflineIMAP/offlineimap3/issues/62
This commit is contained in:
parent
6a45eef3b5
commit
b78af75064
@ -26,6 +26,7 @@ from email import policy
|
|||||||
from email.parser import BytesParser
|
from email.parser import BytesParser
|
||||||
from email.generator import BytesGenerator
|
from email.generator import BytesGenerator
|
||||||
from email.utils import parsedate_tz, mktime_tz
|
from email.utils import parsedate_tz, mktime_tz
|
||||||
|
from email.errors import NoBoundaryInMultipartDefect
|
||||||
|
|
||||||
from offlineimap import threadutil
|
from offlineimap import threadutil
|
||||||
from offlineimap.ui import getglobalui
|
from offlineimap.ui import getglobalui
|
||||||
|
@ -225,6 +225,44 @@ class MaildirFolder(BaseFolder):
|
|||||||
retval[uid] = date_excludees[uid]
|
retval[uid] = date_excludees[uid]
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
|
def _quote_boundary_fix(self, raw_msg_bytes):
|
||||||
|
"""Modify a raw message to quote the boundary separator for multipart messages.
|
||||||
|
|
||||||
|
This function quotes only the first occurrence of the boundary field in
|
||||||
|
the email header, and quotes any boundary value. Improperly quoted
|
||||||
|
boundary fields can give the internal python email library issues.
|
||||||
|
|
||||||
|
:returns: The raw byte stream containing the quoted boundary
|
||||||
|
"""
|
||||||
|
# Use re.split to extract just the header, and search for the boundary in
|
||||||
|
# the context-type header and extract just the boundary and characters per
|
||||||
|
# RFC 2046 ( see https://tools.ietf.org/html/rfc2046#section-5.1.1 )
|
||||||
|
# We don't cap the length to 70 characters, because we are just trying to
|
||||||
|
# soft fix this message to resolve the python library looking for properly
|
||||||
|
# quoted boundaries.
|
||||||
|
try: boundary_field = \
|
||||||
|
re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]*[\"]?)",
|
||||||
|
re.split(b'[\r]?\n[\r]?\n',raw_msg_bytes)[0],re.IGNORECASE).group(1)
|
||||||
|
except AttributeError:
|
||||||
|
# No match
|
||||||
|
return raw_msg_bytes
|
||||||
|
# get the boundary field, and strip off any trailing ws (against RFC rules, leading ws is OK)
|
||||||
|
# if it was already quoted, well then there was nothing to fix
|
||||||
|
boundary, value = boundary_field.split(b'=',1)
|
||||||
|
value = value.rstrip()
|
||||||
|
# ord(b'"') == 34
|
||||||
|
if value[0] == value[-1] == 34:
|
||||||
|
# Sanity Check - Do not requote if already quoted.
|
||||||
|
# A quoted boundary was the end goal so return the original
|
||||||
|
#
|
||||||
|
# No need to worry about if the original email did something like:
|
||||||
|
# boundary="ahahah " as the email library will trim the ws for us
|
||||||
|
return raw_msg_bytes
|
||||||
|
else:
|
||||||
|
new_field = b''.join([boundary,b'="',value,b'"'])
|
||||||
|
return(raw_msg_bytes.replace(boundary_field,new_field,1))
|
||||||
|
|
||||||
|
|
||||||
# Interface from BaseFolder
|
# Interface from BaseFolder
|
||||||
def quickchanged(self, statusfolder):
|
def quickchanged(self, statusfolder):
|
||||||
"""Returns True if the Maildir has changed
|
"""Returns True if the Maildir has changed
|
||||||
@ -260,6 +298,18 @@ class MaildirFolder(BaseFolder):
|
|||||||
filepath = os.path.join(self.getfullname(), filename)
|
filepath = os.path.join(self.getfullname(), filename)
|
||||||
fd = open(filepath, 'rb')
|
fd = open(filepath, 'rb')
|
||||||
retval = self.parser['8bit'].parse(fd)
|
retval = self.parser['8bit'].parse(fd)
|
||||||
|
try:
|
||||||
|
_ = retval.as_bytes(policy=self.policy['8bit'])
|
||||||
|
except UnicodeEncodeError as err:
|
||||||
|
if any(isinstance(defect, NoBoundaryInMultipartDefect) for defect in retval.defects):
|
||||||
|
# (Hopefully) Rare instance where multipart boundary is not
|
||||||
|
# properly quoted. Solve by fixing the boundary and parsing
|
||||||
|
fd.seek(0)
|
||||||
|
_buffer = fd.read()
|
||||||
|
retval = self.parser['8bit'].parsebytes(_quote_boundary_fix(_buffer))
|
||||||
|
else:
|
||||||
|
# Unknown issue which is causing failure of as_bytes()
|
||||||
|
ui.warn("Message has defects preventing it from being processed!")
|
||||||
fd.close()
|
fd.close()
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user