Series of *UNTESTED* changes that should move the internal structure of

a message from a string to an email object that is part of the built-in
email library.  The allows for emails to be processed as bytes and
re-encoded properly if they are not UTF-8 or ascii encoded.  Currently
these changes cover the Base, IMAP, and Maildir classes but not the
specialized GMAIL class yet.
This commit is contained in:
Joseph Ishac 2021-02-09 14:58:30 -05:00
parent 00d395b746
commit 1d2478bcb6
4 changed files with 111 additions and 202 deletions

View File

@ -22,6 +22,11 @@ import re
import time import time
from sys import exc_info from sys import exc_info
from email import policy
from email.parser import BytesParser
from email.generator import BytesGenerator
from email.utils import parsedate_tz, mktime_tz
from offlineimap import threadutil from offlineimap import threadutil
from offlineimap.ui import getglobalui from offlineimap.ui import getglobalui
from offlineimap.error import OfflineImapError from offlineimap.error import OfflineImapError
@ -42,6 +47,22 @@ class BaseFolder:
self.ui = getglobalui() self.ui = getglobalui()
self.messagelist = {} self.messagelist = {}
# Use the built-in email libraries
# Establish some policies
self.policy = {
'7bit':
policy.default.clone(cte_type='7bit',utf8=False,refold_source='none'),
'7bit-RFC':
policy.default.clone(cte_type='7bit',utf8=False,refold_source='none',linesep='\r\n'),
'8bit':
policy.default.clone(cte_type='8bit',utf8=True,refold_source='none'),
'8bit-RFC':
policy.default.clone(cte_type='8bit',utf8=True,refold_source='none',linesep='\r\n'),
}
# Parsers
self.parse = {
'8bit': BytesParser(policy=p1),
}
# Save original name for folderfilter operations. # Save original name for folderfilter operations.
self.ffilter_name = name self.ffilter_name = name
# Top level dir name is always ''. # Top level dir name is always ''.
@ -466,7 +487,7 @@ class BaseFolder:
except: except:
raise IOError("Can't read %s" % uidfile) raise IOError("Can't read %s" % uidfile)
def savemessage(self, uid, content, flags, rtime): def savemessage(self, uid, msg, flags, rtime):
"""Writes a new message, with the specified uid. """Writes a new message, with the specified uid.
If the uid is < 0: The backend should assign a new uid and If the uid is < 0: The backend should assign a new uid and
@ -637,211 +658,90 @@ class BaseFolder:
for uid in uidlist: for uid in uidlist:
self.deletemessagelabels(uid, labels) self.deletemessagelabels(uid, labels)
def addmessageheader(self, content, linebreak, headername, headervalue): def addmessageheader(self, msg, headername, headervalue):
"""Adds new header to the provided message. """Adds new header to the provided message.
WARNING: This function is a bit tricky, and modifying it in the wrong
way, may easily lead to data-loss.
Arguments: Arguments:
- content: message content, headers and body as a single string - msg: message itself
- linebreak: string that carries line ending
- headername: name of the header to add - headername: name of the header to add
- headervalue: value of the header to add - headervalue: value of the header to add
.. note:: Returns: None
The following documentation will not get displayed correctly after
being processed by Sphinx. View the source of this method to read it.
This has to deal with strange corner cases where the header is
missing or empty. Here are illustrations for all the cases,
showing where the header gets inserted and what the end result
is. In each illustration, '+' means the added contents. Note
that these examples assume LF for linebreak, not CRLF, so '\n'
denotes a linebreak and '\n\n' corresponds to the transition
between header and body. However if the linebreak parameter
is set to '\r\n' then you would have to substitute '\r\n' for
'\n' in the below examples.
* Case 1: No '\n\n', leading '\n'
+X-Flying-Pig-Header: i am here\n
\n
This is the body\n
next line\n
* Case 2: '\n\n' at position 0
+X-Flying-Pig-Header: i am here
\n
\n
This is the body\n
next line\n
* Case 3: No '\n\n', no leading '\n'
+X-Flying-Pig-Header: i am here\n
+\n
This is the body\n
next line\n
* Case 4: '\n\n' at non-zero position
Subject: Something wrong with OI\n
From: some@person.at
+\nX-Flying-Pig-Header: i am here
\n
\n
This is the body\n
next line\n
""" """
self.ui.debug('', 'addmessageheader: called to add %s: %s' % self.ui.debug('', 'addmessageheader: called to add %s: %s' %
(headername, headervalue)) (headername, headervalue))
insertionpoint = content.find(linebreak * 2) msg.add_header(headername,headervalue)
if insertionpoint == -1: return
self.ui.debug('', 'addmessageheader: headers were missing')
else:
self.ui.debug('',
'addmessageheader: headers end at position %d' %
insertionpoint)
mark = '==>EOH<=='
contextstart = max(0, insertionpoint - 100)
contextend = min(len(content), insertionpoint + 100)
self.ui.debug('', 'addmessageheader: header/body transition " \
"context (marked by %s): %s%s%s' % (
mark, repr(content[contextstart:insertionpoint]),
mark, repr(content[insertionpoint:contextend])
)
)
# Hoping for case #4. def getmessageheader(self, msg, headername):
prefix = linebreak """Return the value of an undefined occurence of the given header.
suffix = ''
# Case #2.
if insertionpoint == 0:
prefix = ''
suffix = ''
# Either case #1 or #3.
elif insertionpoint == -1:
prefix = ''
suffix = linebreak
insertionpoint = 0
# Case #3: when body starts immediately, without preceding '\n'
# (this shouldn't happen with proper mail messages, but
# we seen many broken ones), we should add '\n' to make
# new (and the only header, in this case) to be properly
# separated from the message body.
if content[0:len(linebreak)] != linebreak:
suffix = suffix + linebreak
self.ui.debug('',
'addmessageheader: insertionpoint = %d' % insertionpoint)
headers = content[0:insertionpoint]
self.ui.debug('',
'addmessageheader: headers = %s' % repr(headers))
new_header = prefix + ("%s: %s" % (headername, headervalue)) + suffix
self.ui.debug('',
'addmessageheader: new_header = %s' % repr(new_header))
return headers + new_header + content[insertionpoint:]
def __find_eoh(self, content):
"""Searches for the point where mail headers end.
Either double '\n', or end of string.
Arguments:
- content: contents of the message to search in
Returns: position of the first non-header byte.
"""
eoh_cr = content.find('\n\n')
if eoh_cr == -1:
eoh_cr = len(content)
return eoh_cr
def getmessageheader(self, content, name):
"""Return the value of the first occurence of the given header.
Header name is case-insensitive. Header name is case-insensitive.
Arguments: Arguments:
- contents: message itself - msg: message itself
- name: name of the header to be searched - headername: name of the header to be searched
Returns: header value or None if no such header was found. Returns: header value or None if no such header was found.
""" """
self.ui.debug('', 'getmessageheader: called to get %s' % name) self.ui.debug('', 'getmessageheader: called to get %s' % name)
eoh = self.__find_eoh(content) return msg.get(headername)
self.ui.debug('', 'getmessageheader: eoh = %d' % eoh)
headers = content[0:eoh]
self.ui.debug('', 'getmessageheader: headers = %s' % repr(headers))
m = re.search('^%s:(.*)$' % name, headers, def getmessageheaderlist(self, msg, headername):
flags=re.MULTILINE | re.IGNORECASE)
if m:
return m.group(1).strip()
else:
return None
def getmessageheaderlist(self, content, name):
"""Return a list of values for the given header. """Return a list of values for the given header.
Header name is case-insensitive.
Arguments: Arguments:
- contents: message itself - msg: message itself
- name: name of the header to be searched - headername: name of the header to be searched
Returns: list of header values or empty list if no such header was Returns: list of header values or empty list if no such header was
found. found.
""" """
self.ui.debug('', 'getmessageheaderlist: called to get %s' % name) self.ui.debug('', 'getmessageheaderlist: called to get %s' % name)
eoh = self.__find_eoh(content) return msg.get_all(headername,[])
self.ui.debug('', 'getmessageheaderlist: eoh = %d' % eoh)
headers = content[0:eoh]
self.ui.debug('', 'getmessageheaderlist: headers = %s' % repr(headers))
return re.findall('^%s:(.*)$' % def deletemessageheaders(self, msg, header_list):
name, headers, flags=re.MULTILINE | re.IGNORECASE) """Deletes headers in the given list from the message.
def deletemessageheaders(self, content, header_list):
"""Deletes headers in the given list from the message content.
Arguments: Arguments:
- content: message itself - msg: message itself
- header_list: list of headers to be deleted or just the header name - header_list: list of headers to be deleted or just the header name
We expect our message to have '\n' as line endings.""" """
if type(header_list) != type([]): if type(header_list) != type([]):
header_list = [header_list] header_list = [header_list]
self.ui.debug('', self.ui.debug('',
'deletemessageheaders: called to delete %s' % header_list) 'deletemessageheaders: called to delete %s' % header_list)
if not len(header_list): for h in header_list:
return content del msg[h]
eoh = self.__find_eoh(content) return
self.ui.debug('', 'deletemessageheaders: end of headers = %d' % eoh)
headers = content[0:eoh]
rest = content[eoh:]
self.ui.debug('', 'deletemessageheaders: headers = %s' % repr(headers))
new_headers = []
for h in headers.split('\n'):
keep_it = True
for trim_h in header_list:
if len(h) > len(trim_h) \
and h[0:len(trim_h) + 1] == (trim_h + ":"):
keep_it = False
break
if keep_it:
new_headers.append(h)
return '\n'.join(new_headers) + rest def get_message_date(self, msg, header="Date"):
"""Returns the Unix timestamp of the email message, derived from the
Date field header by default.
Arguments:
- msg: message itself
- header: headers to extract the date from
Returns: timestamp or `None` in the case of failure.
"""
datetuple = parsedate_tz(msg.get(header))
if datetuple is None:
return None
return mktime_tz(datetuple)
def change_message_uid(self, uid, new_uid): def change_message_uid(self, uid, new_uid):
"""Change the message from existing uid to new_uid. """Change the message from existing uid to new_uid.

View File

@ -375,7 +375,7 @@ class IMAPFolder(BaseFolder):
def getmessagekeywords(self, uid): def getmessagekeywords(self, uid):
return self.messagelist[uid]['keywords'] return self.messagelist[uid]['keywords']
def __generate_randomheader(self, content): def __generate_randomheader(self, msg, policy=None):
"""Returns a unique X-OfflineIMAP header """Returns a unique X-OfflineIMAP header
Generate an 'X-OfflineIMAP' mail header which contains a random Generate an 'X-OfflineIMAP' mail header which contains a random
@ -390,6 +390,10 @@ class IMAPFolder(BaseFolder):
""" """
headername = 'X-OfflineIMAP' headername = 'X-OfflineIMAP'
if policy is None:
output_policy = self.policy['8bit-RFC']
else:
output_policy = policy
# We need a random component too. If we ever upload the same # We need a random component too. If we ever upload the same
# mail twice (e.g. in different folders), we would still need to # mail twice (e.g. in different folders), we would still need to
# get the UID for the correct one. As we won't have too many # get the UID for the correct one. As we won't have too many
@ -398,9 +402,9 @@ class IMAPFolder(BaseFolder):
# Compute unsigned crc32 of 'content' as unique hash. # Compute unsigned crc32 of 'content' as unique hash.
# NB: crc32 returns unsigned only starting with python 3.0. # NB: crc32 returns unsigned only starting with python 3.0.
headervalue = str(binascii.crc32(str.encode(content)) headervalue = '{}-{}'.format(
& 0xffffffff) + '-' (binascii.crc32(msg.as_bytes(policy=output_policy)) & 0xffffffff),
headervalue += str(self.randomgenerator.randint(0, 9999999999)) self.randomgenerator.randint(0, 9999999999))
return headername, headervalue return headername, headervalue
def __savemessage_searchforheader(self, imapobj, headername, headervalue): def __savemessage_searchforheader(self, imapobj, headername, headervalue):
@ -539,7 +543,7 @@ class IMAPFolder(BaseFolder):
return 0 return 0
def __getmessageinternaldate(self, content, rtime=None): def __getmessageinternaldate(self, msg, rtime=None):
"""Parses mail and returns an INTERNALDATE string """Parses mail and returns an INTERNALDATE string
It will use information in the following order, falling back as an It will use information in the following order, falling back as an
@ -571,7 +575,7 @@ class IMAPFolder(BaseFolder):
(which is fine as value for append).""" (which is fine as value for append)."""
if rtime is None: if rtime is None:
rtime = emailutil.get_message_date(content) rtime = self.get_message_date(msg)
if rtime is None: if rtime is None:
return None return None
datetuple = time.localtime(rtime) datetuple = time.localtime(rtime)
@ -619,7 +623,7 @@ class IMAPFolder(BaseFolder):
return internaldate return internaldate
# Interface from BaseFolder # Interface from BaseFolder
def savemessage(self, uid, content, flags, rtime): def savemessage(self, uid, msg, flags, rtime):
"""Save the message on the Server """Save the message on the Server
This backend always assigns a new uid, so the uid arg is ignored. This backend always assigns a new uid, so the uid arg is ignored.
@ -632,7 +636,7 @@ class IMAPFolder(BaseFolder):
savemessage is never called in a dryrun mode. savemessage is never called in a dryrun mode.
:param uid: Message UID :param uid: Message UID
:param content: Message content :param msg: Message Object
:param flags: Message flags :param flags: Message flags
:param rtime: A timestamp to be used as the mail date :param rtime: A timestamp to be used as the mail date
:returns: the UID of the new message as assigned by the server. If the :returns: the UID of the new message as assigned by the server. If the
@ -647,16 +651,19 @@ class IMAPFolder(BaseFolder):
self.savemessageflags(uid, flags) self.savemessageflags(uid, flags)
return uid return uid
content = self.deletemessageheaders(content, self.filterheaders) # Filter user requested headers before uploading to the IMAP server
self.deletemessageheaders(msg, self.filterheaders)
# Use proper CRLF all over the message. # Should just be able to set the policy
content = re.sub("(?<!\r)\n", CRLF, content) output_policy = self.policy['8bit-RFC']
# # Use proper CRLF all over the message.
# content = re.sub("(?<!\r)\n", CRLF, content)
# Get the date of the message, so we can pass it to the server. # Get the date of the message, so we can pass it to the server.
date = self.__getmessageinternaldate(content, rtime) date = self.__getmessageinternaldate(msg, rtime)
# Message-ID is handy for debugging messages. # Message-ID is handy for debugging messages.
msg_id = self.getmessageheader(content, "message-id") msg_id = self.getmessageheader(msg, "message-id")
if not msg_id: if not msg_id:
msg_id = '[unknown message-id]' msg_id = '[unknown message-id]'
@ -676,16 +683,16 @@ class IMAPFolder(BaseFolder):
if not use_uidplus: if not use_uidplus:
# Insert a random unique header that we can fetch later. # Insert a random unique header that we can fetch later.
(headername, headervalue) = self.__generate_randomheader( (headername, headervalue) = self.__generate_randomheader(
content) msg)
self.ui.debug('imap', 'savemessage: header is: %s: %s' % self.ui.debug('imap', 'savemessage: header is: %s: %s' %
(headername, headervalue)) (headername, headervalue))
content = self.addmessageheader(content, CRLF, self.addmessageheader(msg, headername, headervalue)
headername, headervalue)
msg_s = msg.as_string(policy=output_policy)
if len(content) > 200: if len(content) > 200:
dbg_output = "%s...%s" % (content[:150], content[-50:]) dbg_output = "%s...%s" % (msg_s[:150], msg_s[-50:])
else: else:
dbg_output = content dbg_output = msg_s
self.ui.debug('imap', "savemessage: date: %s, content: '%s'" % self.ui.debug('imap', "savemessage: date: %s, content: '%s'" %
(date, dbg_output)) (date, dbg_output))
@ -695,7 +702,7 @@ class IMAPFolder(BaseFolder):
except imapobj.readonly: except imapobj.readonly:
# readonly exception. Return original uid to notify that # readonly exception. Return original uid to notify that
# we did not save the message. (see savemessage in Base.py) # we did not save the message. (see savemessage in Base.py)
self.ui.msgtoreadonly(self, uid, content, flags) self.ui.msgtoreadonly(self, uid)
return uid return uid
# Do the APPEND. # Do the APPEND.
@ -703,7 +710,7 @@ class IMAPFolder(BaseFolder):
(typ, dat) = imapobj.append( (typ, dat) = imapobj.append(
self.getfullIMAPname(), self.getfullIMAPname(),
imaputil.flagsmaildir2imap(flags), imaputil.flagsmaildir2imap(flags),
date, bytes(content, 'utf-8')) date, msg.as_bytes(policy=output_policy))
# This should only catch 'NO' responses since append() # This should only catch 'NO' responses since append()
# will raise an exception for 'BAD' responses: # will raise an exception for 'BAD' responses:
if typ != 'OK': if typ != 'OK':
@ -716,12 +723,12 @@ class IMAPFolder(BaseFolder):
# In this case, we should immediately abort # In this case, we should immediately abort
# the repository sync and continue # the repository sync and continue
# with the next account. # with the next account.
msg = \ err_msg = \
"Saving msg (%s) in folder '%s', " \ "Saving msg (%s) in folder '%s', " \
"repository '%s' failed (abort). " \ "repository '%s' failed (abort). " \
"Server responded: %s %s\n" % \ "Server responded: %s %s\n" % \
(msg_id, self, self.getrepository(), typ, dat) (msg_id, self, self.getrepository(), typ, dat)
raise OfflineImapError(msg, OfflineImapError.ERROR.REPO) raise OfflineImapError(err_msg, OfflineImapError.ERROR.REPO)
retry_left = 0 # Mark as success. retry_left = 0 # Mark as success.
except imapobj.abort as e: except imapobj.abort as e:
# Connection has been reset, release connection and retry. # Connection has been reset, release connection and retry.

View File

@ -258,12 +258,10 @@ class MaildirFolder(BaseFolder):
filename = self.messagelist[uid]['filename'] filename = self.messagelist[uid]['filename']
filepath = os.path.join(self.getfullname(), filename) filepath = os.path.join(self.getfullname(), filename)
file = open(filepath, 'rt') fd = open(filepath, 'rb')
retval = file.read() retval = self.parse['8bit'](fd)
file.close() fd.close()
# TODO: WHY are we replacing \r\n with \n here? And why do we return retval
# read it as text?
return retval.replace("\r\n", "\n")
# Interface from BaseFolder # Interface from BaseFolder
def getmessagetime(self, uid): def getmessagetime(self, uid):
@ -288,17 +286,21 @@ class MaildirFolder(BaseFolder):
uid, self._foldermd5, self.infosep, ''.join(sorted(flags))) uid, self._foldermd5, self.infosep, ''.join(sorted(flags)))
return uniq_name.replace(os.path.sep, self.sep_subst) return uniq_name.replace(os.path.sep, self.sep_subst)
def save_to_tmp_file(self, filename, content): def save_to_tmp_file(self, filename, msg, policy=None):
"""Saves given content to the named temporary file in the """Saves given content to the named temporary file in the
'tmp' subdirectory of $CWD. 'tmp' subdirectory of $CWD.
Arguments: Arguments:
- filename: name of the temporary file; - filename: name of the temporary file;
- content: data to be saved. - msg: Email message object
Returns: relative path to the temporary file Returns: relative path to the temporary file
that was created.""" that was created."""
if policy is None:
output_policy = self.policy['8bit']
else:
output_policy = policy
tmpname = os.path.join('tmp', filename) tmpname = os.path.join('tmp', filename)
# Open file and write it out. # Open file and write it out.
# XXX: why do we need to loop 7 times? # XXX: why do we need to loop 7 times?
@ -324,8 +326,8 @@ class MaildirFolder(BaseFolder):
else: else:
raise raise
fd = os.fdopen(fd, 'wt') fd = os.fdopen(fd, 'wb')
fd.write(content) fd.write(msg.as_bytes(policy=output_policy))
# Make sure the data hits the disk. # Make sure the data hits the disk.
fd.flush() fd.flush()
if self.dofsync(): if self.dofsync():
@ -335,7 +337,7 @@ class MaildirFolder(BaseFolder):
return tmpname return tmpname
# Interface from BaseFolder # Interface from BaseFolder
def savemessage(self, uid, content, flags, rtime): def savemessage(self, uid, msg, flags, rtime):
"""Writes a new message, with the specified uid. """Writes a new message, with the specified uid.
See folder/Base for detail. Note that savemessage() does not See folder/Base for detail. Note that savemessage() does not
@ -359,15 +361,15 @@ class MaildirFolder(BaseFolder):
message_timestamp = None message_timestamp = None
if self._filename_use_mail_timestamp is not False: if self._filename_use_mail_timestamp is not False:
try: try:
message_timestamp = emailutil.get_message_date(content, 'Date') message_timestamp = self.get_message_date(msg, 'Date')
if message_timestamp is None: if message_timestamp is None:
# Give a try with Delivery-date # Give a try with Delivery-date
message_timestamp = emailutil.get_message_date( message_timestamp = self.get_message_date(
content, 'Delivery-date') msg, 'Delivery-date')
except Exception as e: except Exception as e:
# This should never happen. # This should never happen.
from offlineimap.ui import getglobalui from offlineimap.ui import getglobalui
datestr = emailutil.get_message_date(content) datestr = self.get_message_date(msg)
ui = getglobalui() ui = getglobalui()
ui.warn("UID %d has invalid date %s: %s\n" ui.warn("UID %d has invalid date %s: %s\n"
"Not using message timestamp as file prefix" % "Not using message timestamp as file prefix" %
@ -375,11 +377,11 @@ class MaildirFolder(BaseFolder):
# No need to check if message_timestamp is None here since it # No need to check if message_timestamp is None here since it
# would be overridden by _gettimeseq. # would be overridden by _gettimeseq.
messagename = self.new_message_filename(uid, flags, date=message_timestamp) messagename = self.new_message_filename(uid, flags, date=message_timestamp)
tmpname = self.save_to_tmp_file(messagename, content) tmpname = self.save_to_tmp_file(messagename, msg)
if self._utime_from_header is True: if self._utime_from_header is True:
try: try:
date = emailutil.get_message_date(content, 'Date') date = self.get_message_date(msg, 'Date')
if date is not None: if date is not None:
os.utime(os.path.join(self.getfullname(), tmpname), os.utime(os.path.join(self.getfullname(), tmpname),
(date, date)) (date, date))
@ -387,7 +389,7 @@ class MaildirFolder(BaseFolder):
# int32. # int32.
except Exception as e: except Exception as e:
from offlineimap.ui import getglobalui from offlineimap.ui import getglobalui
datestr = emailutil.get_message_date(content) datestr = self.get_message_date(msg)
ui = getglobalui() ui = getglobalui()
ui.warn("UID %d has invalid date %s: %s\n" ui.warn("UID %d has invalid date %s: %s\n"
"Not changing file modification time" % (uid, datestr, e)) "Not changing file modification time" % (uid, datestr, e))

View File

@ -266,7 +266,7 @@ class UIBase:
(self.getnicename(x), x.getname()) for x in folder_list]) (self.getnicename(x), x.getname()) for x in folder_list])
# WARNINGS # WARNINGS
def msgtoreadonly(self, destfolder, uid, content, flags): def msgtoreadonly(self, destfolder, uid):
if self.config.has_option('general', 'ignore-readonly') and \ if self.config.has_option('general', 'ignore-readonly') and \
self.config.getboolean('general', 'ignore-readonly'): self.config.getboolean('general', 'ignore-readonly'):
return return