Series of *UNTESTED* changes that should move the internal structure of

a message from a string to an email object that is part of the built-in
email library.  The allows for emails to be processed as bytes and
re-encoded properly if they are not UTF-8 or ascii encoded.  Currently
these changes cover the Base, IMAP, and Maildir classes but not the
specialized GMAIL class yet.
This commit is contained in:
Joseph Ishac 2021-02-09 14:58:30 -05:00
parent 00d395b746
commit 1d2478bcb6
4 changed files with 111 additions and 202 deletions

View File

@ -22,6 +22,11 @@ import re
import time
from sys import exc_info
from email import policy
from email.parser import BytesParser
from email.generator import BytesGenerator
from email.utils import parsedate_tz, mktime_tz
from offlineimap import threadutil
from offlineimap.ui import getglobalui
from offlineimap.error import OfflineImapError
@ -42,6 +47,22 @@ class BaseFolder:
self.ui = getglobalui()
self.messagelist = {}
# Use the built-in email libraries
# Establish some policies
self.policy = {
'7bit':
policy.default.clone(cte_type='7bit',utf8=False,refold_source='none'),
'7bit-RFC':
policy.default.clone(cte_type='7bit',utf8=False,refold_source='none',linesep='\r\n'),
'8bit':
policy.default.clone(cte_type='8bit',utf8=True,refold_source='none'),
'8bit-RFC':
policy.default.clone(cte_type='8bit',utf8=True,refold_source='none',linesep='\r\n'),
}
# Parsers
self.parse = {
'8bit': BytesParser(policy=p1),
}
# Save original name for folderfilter operations.
self.ffilter_name = name
# Top level dir name is always ''.
@ -466,7 +487,7 @@ class BaseFolder:
except:
raise IOError("Can't read %s" % uidfile)
def savemessage(self, uid, content, flags, rtime):
def savemessage(self, uid, msg, flags, rtime):
"""Writes a new message, with the specified uid.
If the uid is < 0: The backend should assign a new uid and
@ -637,211 +658,90 @@ class BaseFolder:
for uid in uidlist:
self.deletemessagelabels(uid, labels)
def addmessageheader(self, content, linebreak, headername, headervalue):
def addmessageheader(self, msg, headername, headervalue):
"""Adds new header to the provided message.
WARNING: This function is a bit tricky, and modifying it in the wrong
way, may easily lead to data-loss.
Arguments:
- content: message content, headers and body as a single string
- linebreak: string that carries line ending
- msg: message itself
- headername: name of the header to add
- headervalue: value of the header to add
.. note::
Returns: None
The following documentation will not get displayed correctly after
being processed by Sphinx. View the source of this method to read it.
This has to deal with strange corner cases where the header is
missing or empty. Here are illustrations for all the cases,
showing where the header gets inserted and what the end result
is. In each illustration, '+' means the added contents. Note
that these examples assume LF for linebreak, not CRLF, so '\n'
denotes a linebreak and '\n\n' corresponds to the transition
between header and body. However if the linebreak parameter
is set to '\r\n' then you would have to substitute '\r\n' for
'\n' in the below examples.
* Case 1: No '\n\n', leading '\n'
+X-Flying-Pig-Header: i am here\n
\n
This is the body\n
next line\n
* Case 2: '\n\n' at position 0
+X-Flying-Pig-Header: i am here
\n
\n
This is the body\n
next line\n
* Case 3: No '\n\n', no leading '\n'
+X-Flying-Pig-Header: i am here\n
+\n
This is the body\n
next line\n
* Case 4: '\n\n' at non-zero position
Subject: Something wrong with OI\n
From: some@person.at
+\nX-Flying-Pig-Header: i am here
\n
\n
This is the body\n
next line\n
"""
self.ui.debug('', 'addmessageheader: called to add %s: %s' %
(headername, headervalue))
insertionpoint = content.find(linebreak * 2)
if insertionpoint == -1:
self.ui.debug('', 'addmessageheader: headers were missing')
else:
self.ui.debug('',
'addmessageheader: headers end at position %d' %
insertionpoint)
mark = '==>EOH<=='
contextstart = max(0, insertionpoint - 100)
contextend = min(len(content), insertionpoint + 100)
self.ui.debug('', 'addmessageheader: header/body transition " \
"context (marked by %s): %s%s%s' % (
mark, repr(content[contextstart:insertionpoint]),
mark, repr(content[insertionpoint:contextend])
)
)
msg.add_header(headername,headervalue)
return
# Hoping for case #4.
prefix = linebreak
suffix = ''
# Case #2.
if insertionpoint == 0:
prefix = ''
suffix = ''
# Either case #1 or #3.
elif insertionpoint == -1:
prefix = ''
suffix = linebreak
insertionpoint = 0
# Case #3: when body starts immediately, without preceding '\n'
# (this shouldn't happen with proper mail messages, but
# we seen many broken ones), we should add '\n' to make
# new (and the only header, in this case) to be properly
# separated from the message body.
if content[0:len(linebreak)] != linebreak:
suffix = suffix + linebreak
self.ui.debug('',
'addmessageheader: insertionpoint = %d' % insertionpoint)
headers = content[0:insertionpoint]
self.ui.debug('',
'addmessageheader: headers = %s' % repr(headers))
new_header = prefix + ("%s: %s" % (headername, headervalue)) + suffix
self.ui.debug('',
'addmessageheader: new_header = %s' % repr(new_header))
return headers + new_header + content[insertionpoint:]
def __find_eoh(self, content):
"""Searches for the point where mail headers end.
Either double '\n', or end of string.
Arguments:
- content: contents of the message to search in
Returns: position of the first non-header byte.
"""
eoh_cr = content.find('\n\n')
if eoh_cr == -1:
eoh_cr = len(content)
return eoh_cr
def getmessageheader(self, content, name):
"""Return the value of the first occurence of the given header.
def getmessageheader(self, msg, headername):
"""Return the value of an undefined occurence of the given header.
Header name is case-insensitive.
Arguments:
- contents: message itself
- name: name of the header to be searched
- msg: message itself
- headername: name of the header to be searched
Returns: header value or None if no such header was found.
"""
self.ui.debug('', 'getmessageheader: called to get %s' % name)
eoh = self.__find_eoh(content)
self.ui.debug('', 'getmessageheader: eoh = %d' % eoh)
headers = content[0:eoh]
self.ui.debug('', 'getmessageheader: headers = %s' % repr(headers))
return msg.get(headername)
m = re.search('^%s:(.*)$' % name, headers,
flags=re.MULTILINE | re.IGNORECASE)
if m:
return m.group(1).strip()
else:
return None
def getmessageheaderlist(self, content, name):
def getmessageheaderlist(self, msg, headername):
"""Return a list of values for the given header.
Header name is case-insensitive.
Arguments:
- contents: message itself
- name: name of the header to be searched
- msg: message itself
- headername: name of the header to be searched
Returns: list of header values or empty list if no such header was
found.
"""
self.ui.debug('', 'getmessageheaderlist: called to get %s' % name)
eoh = self.__find_eoh(content)
self.ui.debug('', 'getmessageheaderlist: eoh = %d' % eoh)
headers = content[0:eoh]
self.ui.debug('', 'getmessageheaderlist: headers = %s' % repr(headers))
return msg.get_all(headername,[])
return re.findall('^%s:(.*)$' %
name, headers, flags=re.MULTILINE | re.IGNORECASE)
def deletemessageheaders(self, content, header_list):
"""Deletes headers in the given list from the message content.
def deletemessageheaders(self, msg, header_list):
"""Deletes headers in the given list from the message.
Arguments:
- content: message itself
- msg: message itself
- header_list: list of headers to be deleted or just the header name
We expect our message to have '\n' as line endings."""
"""
if type(header_list) != type([]):
header_list = [header_list]
self.ui.debug('',
'deletemessageheaders: called to delete %s' % header_list)
if not len(header_list):
return content
for h in header_list:
del msg[h]
eoh = self.__find_eoh(content)
self.ui.debug('', 'deletemessageheaders: end of headers = %d' % eoh)
headers = content[0:eoh]
rest = content[eoh:]
self.ui.debug('', 'deletemessageheaders: headers = %s' % repr(headers))
new_headers = []
for h in headers.split('\n'):
keep_it = True
for trim_h in header_list:
if len(h) > len(trim_h) \
and h[0:len(trim_h) + 1] == (trim_h + ":"):
keep_it = False
break
if keep_it:
new_headers.append(h)
return
return '\n'.join(new_headers) + rest
def get_message_date(self, msg, header="Date"):
"""Returns the Unix timestamp of the email message, derived from the
Date field header by default.
Arguments:
- msg: message itself
- header: headers to extract the date from
Returns: timestamp or `None` in the case of failure.
"""
datetuple = parsedate_tz(msg.get(header))
if datetuple is None:
return None
return mktime_tz(datetuple)
def change_message_uid(self, uid, new_uid):
"""Change the message from existing uid to new_uid.

View File

@ -375,7 +375,7 @@ class IMAPFolder(BaseFolder):
def getmessagekeywords(self, uid):
return self.messagelist[uid]['keywords']
def __generate_randomheader(self, content):
def __generate_randomheader(self, msg, policy=None):
"""Returns a unique X-OfflineIMAP header
Generate an 'X-OfflineIMAP' mail header which contains a random
@ -390,6 +390,10 @@ class IMAPFolder(BaseFolder):
"""
headername = 'X-OfflineIMAP'
if policy is None:
output_policy = self.policy['8bit-RFC']
else:
output_policy = policy
# We need a random component too. If we ever upload the same
# mail twice (e.g. in different folders), we would still need to
# get the UID for the correct one. As we won't have too many
@ -398,9 +402,9 @@ class IMAPFolder(BaseFolder):
# Compute unsigned crc32 of 'content' as unique hash.
# NB: crc32 returns unsigned only starting with python 3.0.
headervalue = str(binascii.crc32(str.encode(content))
& 0xffffffff) + '-'
headervalue += str(self.randomgenerator.randint(0, 9999999999))
headervalue = '{}-{}'.format(
(binascii.crc32(msg.as_bytes(policy=output_policy)) & 0xffffffff),
self.randomgenerator.randint(0, 9999999999))
return headername, headervalue
def __savemessage_searchforheader(self, imapobj, headername, headervalue):
@ -539,7 +543,7 @@ class IMAPFolder(BaseFolder):
return 0
def __getmessageinternaldate(self, content, rtime=None):
def __getmessageinternaldate(self, msg, rtime=None):
"""Parses mail and returns an INTERNALDATE string
It will use information in the following order, falling back as an
@ -571,7 +575,7 @@ class IMAPFolder(BaseFolder):
(which is fine as value for append)."""
if rtime is None:
rtime = emailutil.get_message_date(content)
rtime = self.get_message_date(msg)
if rtime is None:
return None
datetuple = time.localtime(rtime)
@ -619,7 +623,7 @@ class IMAPFolder(BaseFolder):
return internaldate
# Interface from BaseFolder
def savemessage(self, uid, content, flags, rtime):
def savemessage(self, uid, msg, flags, rtime):
"""Save the message on the Server
This backend always assigns a new uid, so the uid arg is ignored.
@ -632,7 +636,7 @@ class IMAPFolder(BaseFolder):
savemessage is never called in a dryrun mode.
:param uid: Message UID
:param content: Message content
:param msg: Message Object
:param flags: Message flags
:param rtime: A timestamp to be used as the mail date
:returns: the UID of the new message as assigned by the server. If the
@ -647,16 +651,19 @@ class IMAPFolder(BaseFolder):
self.savemessageflags(uid, flags)
return uid
content = self.deletemessageheaders(content, self.filterheaders)
# Filter user requested headers before uploading to the IMAP server
self.deletemessageheaders(msg, self.filterheaders)
# Use proper CRLF all over the message.
content = re.sub("(?<!\r)\n", CRLF, content)
# Should just be able to set the policy
output_policy = self.policy['8bit-RFC']
# # Use proper CRLF all over the message.
# content = re.sub("(?<!\r)\n", CRLF, content)
# Get the date of the message, so we can pass it to the server.
date = self.__getmessageinternaldate(content, rtime)
date = self.__getmessageinternaldate(msg, rtime)
# Message-ID is handy for debugging messages.
msg_id = self.getmessageheader(content, "message-id")
msg_id = self.getmessageheader(msg, "message-id")
if not msg_id:
msg_id = '[unknown message-id]'
@ -676,16 +683,16 @@ class IMAPFolder(BaseFolder):
if not use_uidplus:
# Insert a random unique header that we can fetch later.
(headername, headervalue) = self.__generate_randomheader(
content)
msg)
self.ui.debug('imap', 'savemessage: header is: %s: %s' %
(headername, headervalue))
content = self.addmessageheader(content, CRLF,
headername, headervalue)
self.addmessageheader(msg, headername, headervalue)
msg_s = msg.as_string(policy=output_policy)
if len(content) > 200:
dbg_output = "%s...%s" % (content[:150], content[-50:])
dbg_output = "%s...%s" % (msg_s[:150], msg_s[-50:])
else:
dbg_output = content
dbg_output = msg_s
self.ui.debug('imap', "savemessage: date: %s, content: '%s'" %
(date, dbg_output))
@ -695,7 +702,7 @@ class IMAPFolder(BaseFolder):
except imapobj.readonly:
# readonly exception. Return original uid to notify that
# we did not save the message. (see savemessage in Base.py)
self.ui.msgtoreadonly(self, uid, content, flags)
self.ui.msgtoreadonly(self, uid)
return uid
# Do the APPEND.
@ -703,7 +710,7 @@ class IMAPFolder(BaseFolder):
(typ, dat) = imapobj.append(
self.getfullIMAPname(),
imaputil.flagsmaildir2imap(flags),
date, bytes(content, 'utf-8'))
date, msg.as_bytes(policy=output_policy))
# This should only catch 'NO' responses since append()
# will raise an exception for 'BAD' responses:
if typ != 'OK':
@ -716,12 +723,12 @@ class IMAPFolder(BaseFolder):
# In this case, we should immediately abort
# the repository sync and continue
# with the next account.
msg = \
err_msg = \
"Saving msg (%s) in folder '%s', " \
"repository '%s' failed (abort). " \
"Server responded: %s %s\n" % \
(msg_id, self, self.getrepository(), typ, dat)
raise OfflineImapError(msg, OfflineImapError.ERROR.REPO)
raise OfflineImapError(err_msg, OfflineImapError.ERROR.REPO)
retry_left = 0 # Mark as success.
except imapobj.abort as e:
# Connection has been reset, release connection and retry.

View File

@ -258,12 +258,10 @@ class MaildirFolder(BaseFolder):
filename = self.messagelist[uid]['filename']
filepath = os.path.join(self.getfullname(), filename)
file = open(filepath, 'rt')
retval = file.read()
file.close()
# TODO: WHY are we replacing \r\n with \n here? And why do we
# read it as text?
return retval.replace("\r\n", "\n")
fd = open(filepath, 'rb')
retval = self.parse['8bit'](fd)
fd.close()
return retval
# Interface from BaseFolder
def getmessagetime(self, uid):
@ -288,17 +286,21 @@ class MaildirFolder(BaseFolder):
uid, self._foldermd5, self.infosep, ''.join(sorted(flags)))
return uniq_name.replace(os.path.sep, self.sep_subst)
def save_to_tmp_file(self, filename, content):
def save_to_tmp_file(self, filename, msg, policy=None):
"""Saves given content to the named temporary file in the
'tmp' subdirectory of $CWD.
Arguments:
- filename: name of the temporary file;
- content: data to be saved.
- msg: Email message object
Returns: relative path to the temporary file
that was created."""
if policy is None:
output_policy = self.policy['8bit']
else:
output_policy = policy
tmpname = os.path.join('tmp', filename)
# Open file and write it out.
# XXX: why do we need to loop 7 times?
@ -324,8 +326,8 @@ class MaildirFolder(BaseFolder):
else:
raise
fd = os.fdopen(fd, 'wt')
fd.write(content)
fd = os.fdopen(fd, 'wb')
fd.write(msg.as_bytes(policy=output_policy))
# Make sure the data hits the disk.
fd.flush()
if self.dofsync():
@ -335,7 +337,7 @@ class MaildirFolder(BaseFolder):
return tmpname
# Interface from BaseFolder
def savemessage(self, uid, content, flags, rtime):
def savemessage(self, uid, msg, flags, rtime):
"""Writes a new message, with the specified uid.
See folder/Base for detail. Note that savemessage() does not
@ -359,15 +361,15 @@ class MaildirFolder(BaseFolder):
message_timestamp = None
if self._filename_use_mail_timestamp is not False:
try:
message_timestamp = emailutil.get_message_date(content, 'Date')
message_timestamp = self.get_message_date(msg, 'Date')
if message_timestamp is None:
# Give a try with Delivery-date
message_timestamp = emailutil.get_message_date(
content, 'Delivery-date')
message_timestamp = self.get_message_date(
msg, 'Delivery-date')
except Exception as e:
# This should never happen.
from offlineimap.ui import getglobalui
datestr = emailutil.get_message_date(content)
datestr = self.get_message_date(msg)
ui = getglobalui()
ui.warn("UID %d has invalid date %s: %s\n"
"Not using message timestamp as file prefix" %
@ -375,11 +377,11 @@ class MaildirFolder(BaseFolder):
# No need to check if message_timestamp is None here since it
# would be overridden by _gettimeseq.
messagename = self.new_message_filename(uid, flags, date=message_timestamp)
tmpname = self.save_to_tmp_file(messagename, content)
tmpname = self.save_to_tmp_file(messagename, msg)
if self._utime_from_header is True:
try:
date = emailutil.get_message_date(content, 'Date')
date = self.get_message_date(msg, 'Date')
if date is not None:
os.utime(os.path.join(self.getfullname(), tmpname),
(date, date))
@ -387,7 +389,7 @@ class MaildirFolder(BaseFolder):
# int32.
except Exception as e:
from offlineimap.ui import getglobalui
datestr = emailutil.get_message_date(content)
datestr = self.get_message_date(msg)
ui = getglobalui()
ui.warn("UID %d has invalid date %s: %s\n"
"Not changing file modification time" % (uid, datestr, e))

View File

@ -266,7 +266,7 @@ class UIBase:
(self.getnicename(x), x.getname()) for x in folder_list])
# WARNINGS
def msgtoreadonly(self, destfolder, uid, content, flags):
def msgtoreadonly(self, destfolder, uid):
if self.config.has_option('general', 'ignore-readonly') and \
self.config.getboolean('general', 'ignore-readonly'):
return