Adding a handler to detect the unlikely edge case where a message may
have an improperly quoted boundary that can cause the python library to fail to reproduce the original message with msg.as_bytes(). See: https://bugs.python.org/issue43818 and https://github.com/OfflineIMAP/offlineimap3/issues/62
This commit is contained in:
		| @@ -225,6 +225,44 @@ class MaildirFolder(BaseFolder): | ||||
|                         retval[uid] = date_excludees[uid] | ||||
|         return retval | ||||
|  | ||||
|     def _quote_boundary_fix(self, raw_msg_bytes): | ||||
|         """Modify a raw message to quote the boundary separator for multipart messages. | ||||
|  | ||||
|         This function quotes only the first occurrence of the boundary field in | ||||
|         the email header, and quotes any boundary value.  Improperly quoted | ||||
|         boundary fields can give the internal python email library issues. | ||||
|  | ||||
|         :returns: The raw byte stream containing the quoted boundary | ||||
|         """ | ||||
|         # Use re.split to extract just the header, and search for the boundary in | ||||
|         # the context-type header and extract just the boundary and characters per | ||||
|         # RFC 2046 ( see https://tools.ietf.org/html/rfc2046#section-5.1.1 ) | ||||
|         # We don't cap the length to 70 characters, because we are just trying to | ||||
|         # soft fix this message to resolve the python library looking for properly | ||||
|         # quoted boundaries. | ||||
|         try: boundary_field = \ | ||||
|             re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]*[\"]?)", | ||||
|             re.split(b'[\r]?\n[\r]?\n',raw_msg_bytes)[0],re.IGNORECASE).group(1) | ||||
|         except AttributeError: | ||||
|             # No match | ||||
|             return raw_msg_bytes | ||||
|         # get the boundary field, and strip off any trailing ws (against RFC rules, leading ws is OK) | ||||
|         # if it was already quoted, well then there was nothing to fix | ||||
|         boundary, value = boundary_field.split(b'=',1) | ||||
|         value = value.rstrip() | ||||
|         # ord(b'"') == 34 | ||||
|         if value[0] == value[-1] == 34: | ||||
|             # Sanity Check - Do not requote if already quoted. | ||||
|             # A quoted boundary was the end goal so return the original | ||||
|             # | ||||
|             # No need to worry about if the original email did something like: | ||||
|             # boundary="ahahah  " as the email library will trim the ws for us | ||||
|             return raw_msg_bytes | ||||
|         else: | ||||
|             new_field = b''.join([boundary,b'="',value,b'"']) | ||||
|             return(raw_msg_bytes.replace(boundary_field,new_field,1)) | ||||
|  | ||||
|  | ||||
|     # Interface from BaseFolder | ||||
|     def quickchanged(self, statusfolder): | ||||
|         """Returns True if the Maildir has changed | ||||
| @@ -260,6 +298,18 @@ class MaildirFolder(BaseFolder): | ||||
|         filepath = os.path.join(self.getfullname(), filename) | ||||
|         fd = open(filepath, 'rb') | ||||
|         retval = self.parser['8bit'].parse(fd) | ||||
|         try: | ||||
|             _ = retval.as_bytes(policy=self.policy['8bit']) | ||||
|         except UnicodeEncodeError as err: | ||||
|             if any(isinstance(defect, NoBoundaryInMultipartDefect) for defect in retval.defects): | ||||
|                 # (Hopefully) Rare instance where multipart boundary is not | ||||
|                 # properly quoted.  Solve by fixing the boundary and parsing | ||||
|                 fd.seek(0) | ||||
|                 _buffer = fd.read() | ||||
|                 retval = self.parser['8bit'].parsebytes(_quote_boundary_fix(_buffer)) | ||||
|             else: | ||||
|                 # Unknown issue which is causing failure of as_bytes() | ||||
|                 ui.warn("Message has defects preventing it from being processed!") | ||||
|         fd.close() | ||||
|         return retval | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Joseph Ishac
					Joseph Ishac