diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py index f757e75..58f4fcd 100644 --- a/offlineimap/folder/Base.py +++ b/offlineimap/folder/Base.py @@ -841,6 +841,76 @@ class BaseFolder: (uid, self.accountname)) raise # Raise on unknown errors, so we can fix those. + def _extract_message_id(self, raw_msg_bytes): + """Extract the Message-ID from a bytes object containing a raw message. + + This function attempts to find the Message-ID for a message that has not + been processed by the built-in email library, and is therefore NOT an + email object. If parsing the message fails (or is otherwise not + needed), this utility can be useful to help provide a (hopefully) unique + identifier in log messages to facilitate locating the message on disk. + + :param raw_msg_bytes: bytes object containing the raw email message. + :returns: A tuple containing the contents of the Message-ID header if + found (or if not found) and a flag which is True if + the Message-ID was in proper RFC format or False if it contained + defects. + """ + msg_header = re.split(b'[\r]?\n[\r]?\n', raw_msg_bytes)[0] + try: + msg_id = re.search(b"\nmessage-id:[\s]+(<[A-Za-z0-9!#$%&'*+-/=?^_`{}|~.@ ]+>)", + msg_header, re.IGNORECASE).group(1) + except AttributeError: + # No match - Likely not following RFC rules. Try and find anything + # that looks like it could be the Message-ID but flag it. + _start_pos = msg_header.find(b'\nMessage-ID:') + if _start_pos > 0: + _end_pos = msg_header.find(b'\n',_start_pos+15) + msg_id = msg_header[_start_pos+12:_end_pos].strip() + return (msg_id, False) + else: + return (b"", False) + return (msg_id, True) + + def _quote_boundary_fix(self, raw_msg_bytes): + """Modify a raw message to quote the boundary separator for multipart messages. + + This function quotes only the first occurrence of the boundary field in + the email header, and quotes any boundary value. Improperly quoted + boundary fields can give the internal python email library issues. + + :param raw_msg_bytes: bytes object containing the raw email message. + :returns: The raw byte stream containing the quoted boundary + """ + # Use re.split to extract just the header, and search for the boundary in + # the context-type header and extract just the boundary and characters per + # RFC 2046 ( see https://tools.ietf.org/html/rfc2046#section-5.1.1 ) + # We don't cap the length to 70 characters, because we are just trying to + # soft fix this message to resolve the python library looking for properly + # quoted boundaries. + try: boundary_field = \ + re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]+[\"]?)", + re.split(b'[\r]?\n[\r]?\n', raw_msg_bytes)[0], + (re.IGNORECASE|re.DOTALL)).group(1) + except AttributeError: + # No match + return raw_msg_bytes + # get the boundary field, and strip off any trailing ws (against RFC rules, leading ws is OK) + # if it was already quoted, well then there was nothing to fix + boundary, value = boundary_field.split(b'=', 1) + value = value.rstrip() + # ord(b'"') == 34 + if value[0] == value[-1] == 34: + # Sanity Check - Do not requote if already quoted. + # A quoted boundary was the end goal so return the original + # + # No need to worry about if the original email did something like: + # boundary="ahahah " as the email library will trim the ws for us + return raw_msg_bytes + else: + new_field = b''.join([boundary, b'="', value, b'"']) + return(raw_msg_bytes.replace(boundary_field, new_field, 1)) + def __syncmessagesto_copy(self, dstfolder, statusfolder): """Pass1: Copy locally existing messages not on the other side. diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 50da5a2..92ac124 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -226,45 +226,6 @@ class MaildirFolder(BaseFolder): retval[uid] = date_excludees[uid] return retval - def _quote_boundary_fix(self, raw_msg_bytes): - """Modify a raw message to quote the boundary separator for multipart messages. - - This function quotes only the first occurrence of the boundary field in - the email header, and quotes any boundary value. Improperly quoted - boundary fields can give the internal python email library issues. - - :returns: The raw byte stream containing the quoted boundary - """ - # Use re.split to extract just the header, and search for the boundary in - # the context-type header and extract just the boundary and characters per - # RFC 2046 ( see https://tools.ietf.org/html/rfc2046#section-5.1.1 ) - # We don't cap the length to 70 characters, because we are just trying to - # soft fix this message to resolve the python library looking for properly - # quoted boundaries. - try: boundary_field = \ - re.search(b"content-type:.*(boundary=[\"]?[A-Za-z0-9'()+_,-./:=? ]+[\"]?)", - re.split(b'[\r]?\n[\r]?\n', raw_msg_bytes)[0], - (re.IGNORECASE|re.DOTALL)).group(1) - except AttributeError: - # No match - return raw_msg_bytes - # get the boundary field, and strip off any trailing ws (against RFC rules, leading ws is OK) - # if it was already quoted, well then there was nothing to fix - boundary, value = boundary_field.split(b'=', 1) - value = value.rstrip() - # ord(b'"') == 34 - if value[0] == value[-1] == 34: - # Sanity Check - Do not requote if already quoted. - # A quoted boundary was the end goal so return the original - # - # No need to worry about if the original email did something like: - # boundary="ahahah " as the email library will trim the ws for us - return raw_msg_bytes - else: - new_field = b''.join([boundary, b'="', value, b'"']) - return(raw_msg_bytes.replace(boundary_field, new_field, 1)) - - # Interface from BaseFolder def quickchanged(self, statusfolder): """Returns True if the Maildir has changed