Adding utilities to Base class.
Moving the quoted boundary fix to the Base class so that it can be used by any subclass that needs to read an email. Adding another utility to extract message-id from a raw email.
This commit is contained in:
parent
b4f100c92c
commit
9bebcbe4f7
@ -841,6 +841,76 @@ class BaseFolder:
|
|||||||
(uid, self.accountname))
|
(uid, self.accountname))
|
||||||
raise # Raise on unknown errors, so we can fix those.
|
raise # Raise on unknown errors, so we can fix those.
|
||||||
|
|
||||||
|
def _extract_message_id(self, raw_msg_bytes):
|
||||||
|
"""Extract the Message-ID from a bytes object containing a raw message.
|
||||||
|
|
||||||
|
This function attempts to find the Message-ID for a message that has not
|
||||||
|
been processed by the built-in email library, and is therefore NOT an
|
||||||
|
email object. If parsing the message fails (or is otherwise not
|
||||||
|
needed), this utility can be useful to help provide a (hopefully) unique
|
||||||
|
identifier in log messages to facilitate locating the message on disk.
|
||||||
|
|
||||||
|
:param raw_msg_bytes: bytes object containing the raw email message.
|
||||||
|
:returns: A tuple containing the contents of the Message-ID header if
|
||||||
|
found (or <Unknown Message-ID> if not found) and a flag which is True if
|
||||||
|
the Message-ID was in proper RFC format or False if it contained
|
||||||
|
defects.
|
||||||
|
"""
|
||||||
|
msg_header = re.split(b'[\r]?\n[\r]?\n', raw_msg_bytes)[0]
|
||||||
|
try:
|
||||||
|
msg_id = re.search(b"\nmessage-id:[\s]+(<[A-Za-z0-9!#$%&'*+-/=?^_`{}|~.@ ]+>)",
|
||||||
|
msg_header, re.IGNORECASE).group(1)
|
||||||
|
except AttributeError:
|
||||||
|
# No match - Likely not following RFC rules. Try and find anything
|
||||||
|
# that looks like it could be the Message-ID but flag it.
|
||||||
|
_start_pos = msg_header.find(b'\nMessage-ID:')
|
||||||
|
if _start_pos > 0:
|
||||||
|
_end_pos = msg_header.find(b'\n',_start_pos+15)
|
||||||
|
msg_id = msg_header[_start_pos+12:_end_pos].strip()
|
||||||
|
return (msg_id, False)
|
||||||
|
else:
|
||||||
|
return (b"<Unknown Message-ID>", False)
|
||||||
|
return (msg_id, True)
|
||||||
|
|
||||||
|
def _quote_boundary_fix(self, raw_msg_bytes):
|
||||||
|
"""Modify a raw message to quote the boundary separator for multipart messages.
|
||||||
|
|
||||||
|
This function quotes only the first occurrence of the boundary field in
|
||||||
|
the email header, and quotes any boundary value. Improperly quoted
|
||||||
|
boundary fields can give the internal python email library issues.
|
||||||
|
|
||||||
|
:param raw_msg_bytes: bytes object containing the raw email message.
|
||||||
|
:returns: The raw byte stream containing the quoted boundary
|
||||||
|
"""
|
||||||
|
# Use re.split to extract just the header, and search for the boundary in
|
||||||
|
# the context-type header and extract just the boundary and characters per
|
||||||
|
# RFC 2046 ( see https://tools.ietf.org/html/rfc2046#section-5.1.1 )
|
||||||
|
# We don't cap the length to 70 characters, because we are just trying to
|
||||||
|
# soft fix this message to resolve the python library looking for properly
|
||||||
|
# quoted boundaries.
|
||||||
|
try: boundary_field = \
|
||||||
|
re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]+[\"]?)",
|
||||||
|
re.split(b'[\r]?\n[\r]?\n', raw_msg_bytes)[0],
|
||||||
|
(re.IGNORECASE|re.DOTALL)).group(1)
|
||||||
|
except AttributeError:
|
||||||
|
# No match
|
||||||
|
return raw_msg_bytes
|
||||||
|
# get the boundary field, and strip off any trailing ws (against RFC rules, leading ws is OK)
|
||||||
|
# if it was already quoted, well then there was nothing to fix
|
||||||
|
boundary, value = boundary_field.split(b'=', 1)
|
||||||
|
value = value.rstrip()
|
||||||
|
# ord(b'"') == 34
|
||||||
|
if value[0] == value[-1] == 34:
|
||||||
|
# Sanity Check - Do not requote if already quoted.
|
||||||
|
# A quoted boundary was the end goal so return the original
|
||||||
|
#
|
||||||
|
# No need to worry about if the original email did something like:
|
||||||
|
# boundary="ahahah " as the email library will trim the ws for us
|
||||||
|
return raw_msg_bytes
|
||||||
|
else:
|
||||||
|
new_field = b''.join([boundary, b'="', value, b'"'])
|
||||||
|
return(raw_msg_bytes.replace(boundary_field, new_field, 1))
|
||||||
|
|
||||||
def __syncmessagesto_copy(self, dstfolder, statusfolder):
|
def __syncmessagesto_copy(self, dstfolder, statusfolder):
|
||||||
"""Pass1: Copy locally existing messages not on the other side.
|
"""Pass1: Copy locally existing messages not on the other side.
|
||||||
|
|
||||||
|
@ -226,45 +226,6 @@ class MaildirFolder(BaseFolder):
|
|||||||
retval[uid] = date_excludees[uid]
|
retval[uid] = date_excludees[uid]
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
def _quote_boundary_fix(self, raw_msg_bytes):
|
|
||||||
"""Modify a raw message to quote the boundary separator for multipart messages.
|
|
||||||
|
|
||||||
This function quotes only the first occurrence of the boundary field in
|
|
||||||
the email header, and quotes any boundary value. Improperly quoted
|
|
||||||
boundary fields can give the internal python email library issues.
|
|
||||||
|
|
||||||
:returns: The raw byte stream containing the quoted boundary
|
|
||||||
"""
|
|
||||||
# Use re.split to extract just the header, and search for the boundary in
|
|
||||||
# the context-type header and extract just the boundary and characters per
|
|
||||||
# RFC 2046 ( see https://tools.ietf.org/html/rfc2046#section-5.1.1 )
|
|
||||||
# We don't cap the length to 70 characters, because we are just trying to
|
|
||||||
# soft fix this message to resolve the python library looking for properly
|
|
||||||
# quoted boundaries.
|
|
||||||
try: boundary_field = \
|
|
||||||
re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]+[\"]?)",
|
|
||||||
re.split(b'[\r]?\n[\r]?\n', raw_msg_bytes)[0],
|
|
||||||
(re.IGNORECASE|re.DOTALL)).group(1)
|
|
||||||
except AttributeError:
|
|
||||||
# No match
|
|
||||||
return raw_msg_bytes
|
|
||||||
# get the boundary field, and strip off any trailing ws (against RFC rules, leading ws is OK)
|
|
||||||
# if it was already quoted, well then there was nothing to fix
|
|
||||||
boundary, value = boundary_field.split(b'=', 1)
|
|
||||||
value = value.rstrip()
|
|
||||||
# ord(b'"') == 34
|
|
||||||
if value[0] == value[-1] == 34:
|
|
||||||
# Sanity Check - Do not requote if already quoted.
|
|
||||||
# A quoted boundary was the end goal so return the original
|
|
||||||
#
|
|
||||||
# No need to worry about if the original email did something like:
|
|
||||||
# boundary="ahahah " as the email library will trim the ws for us
|
|
||||||
return raw_msg_bytes
|
|
||||||
else:
|
|
||||||
new_field = b''.join([boundary, b'="', value, b'"'])
|
|
||||||
return(raw_msg_bytes.replace(boundary_field, new_field, 1))
|
|
||||||
|
|
||||||
|
|
||||||
# Interface from BaseFolder
|
# Interface from BaseFolder
|
||||||
def quickchanged(self, statusfolder):
|
def quickchanged(self, statusfolder):
|
||||||
"""Returns True if the Maildir has changed
|
"""Returns True if the Maildir has changed
|
||||||
|
Loading…
Reference in New Issue
Block a user