diff --git a/offlineimap/emailutil.py b/offlineimap/emailutil.py deleted file mode 100644 index ea035f6..0000000 --- a/offlineimap/emailutil.py +++ /dev/null @@ -1,38 +0,0 @@ -# Some useful functions to extract data out of emails -# Copyright (C) 2002-2015 John Goerzen & contributors -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -import email -from email.parser import Parser as MailParser - - -def get_message_date(content, header='Date'): - """Parses mail and returns resulting timestamp. - - :param content: Mail content - :param header: the header to extract date from; - :returns: timestamp or `None` in the case of failure. - """ - - message = MailParser().parsestr(content, True) - dateheader = message.get(header) - # parsedate_tz returns a 10-tuple that can be passed to mktime_tz - # Will be None if missing or not in a valid format. Note that - # indexes 6, 7, and 8 of the result tuple are not usable. - datetuple = email.utils.parsedate_tz(dateheader) - if datetuple is None: - return None - return email.utils.mktime_tz(datetuple) diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py index 8b98555..f757e75 100644 --- a/offlineimap/folder/Base.py +++ b/offlineimap/folder/Base.py @@ -22,6 +22,11 @@ import re import time from sys import exc_info +from email import policy +from email.parser import BytesParser +from email.generator import BytesGenerator +from email.utils import parsedate_tz, mktime_tz + from offlineimap import threadutil from offlineimap.ui import getglobalui from offlineimap.error import OfflineImapError @@ -42,6 +47,22 @@ class BaseFolder: self.ui = getglobalui() self.messagelist = {} + # Use the built-in email libraries + # Establish some policies + self.policy = { + '7bit': + policy.default.clone(cte_type='7bit', utf8=False, refold_source='none'), + '7bit-RFC': + policy.default.clone(cte_type='7bit', utf8=False, refold_source='none', linesep='\r\n'), + '8bit': + policy.default.clone(cte_type='8bit', utf8=True, refold_source='none'), + '8bit-RFC': + policy.default.clone(cte_type='8bit', utf8=True, refold_source='none', linesep='\r\n'), + } + # Parsers + self.parser = {} + for key in self.policy: + self.parser[key] = BytesParser(policy=self.policy[key]) # Save original name for folderfilter operations. self.ffilter_name = name # Top level dir name is always ''. @@ -349,7 +370,7 @@ class BaseFolder: return len(self.getmessagelist()) def getmessage(self, uid): - """Returns the content of the specified message.""" + """Returns an email message object.""" raise NotImplementedError @@ -466,7 +487,7 @@ class BaseFolder: except: raise IOError("Can't read %s" % uidfile) - def savemessage(self, uid, content, flags, rtime): + def savemessage(self, uid, msg, flags, rtime): """Writes a new message, with the specified uid. If the uid is < 0: The backend should assign a new uid and @@ -637,211 +658,90 @@ class BaseFolder: for uid in uidlist: self.deletemessagelabels(uid, labels) - def addmessageheader(self, content, linebreak, headername, headervalue): + def addmessageheader(self, msg, headername, headervalue): """Adds new header to the provided message. - WARNING: This function is a bit tricky, and modifying it in the wrong - way, may easily lead to data-loss. - Arguments: - - content: message content, headers and body as a single string - - linebreak: string that carries line ending + - msg: message object - headername: name of the header to add - headervalue: value of the header to add - .. note:: + Returns: None - The following documentation will not get displayed correctly after - being processed by Sphinx. View the source of this method to read it. - - This has to deal with strange corner cases where the header is - missing or empty. Here are illustrations for all the cases, - showing where the header gets inserted and what the end result - is. In each illustration, '+' means the added contents. Note - that these examples assume LF for linebreak, not CRLF, so '\n' - denotes a linebreak and '\n\n' corresponds to the transition - between header and body. However if the linebreak parameter - is set to '\r\n' then you would have to substitute '\r\n' for - '\n' in the below examples. - - * Case 1: No '\n\n', leading '\n' - - +X-Flying-Pig-Header: i am here\n - \n - This is the body\n - next line\n - - * Case 2: '\n\n' at position 0 - - +X-Flying-Pig-Header: i am here - \n - \n - This is the body\n - next line\n - - * Case 3: No '\n\n', no leading '\n' - - +X-Flying-Pig-Header: i am here\n - +\n - This is the body\n - next line\n - - * Case 4: '\n\n' at non-zero position - - Subject: Something wrong with OI\n - From: some@person.at - +\nX-Flying-Pig-Header: i am here - \n - \n - This is the body\n - next line\n """ self.ui.debug('', 'addmessageheader: called to add %s: %s' % (headername, headervalue)) - insertionpoint = content.find(linebreak * 2) - if insertionpoint == -1: - self.ui.debug('', 'addmessageheader: headers were missing') - else: - self.ui.debug('', - 'addmessageheader: headers end at position %d' % - insertionpoint) - mark = '==>EOH<==' - contextstart = max(0, insertionpoint - 100) - contextend = min(len(content), insertionpoint + 100) - self.ui.debug('', 'addmessageheader: header/body transition " \ - "context (marked by %s): %s%s%s' % ( - mark, repr(content[contextstart:insertionpoint]), - mark, repr(content[insertionpoint:contextend]) - ) - ) + msg.add_header(headername, headervalue) + return - # Hoping for case #4. - prefix = linebreak - suffix = '' - # Case #2. - if insertionpoint == 0: - prefix = '' - suffix = '' - # Either case #1 or #3. - elif insertionpoint == -1: - prefix = '' - suffix = linebreak - insertionpoint = 0 - # Case #3: when body starts immediately, without preceding '\n' - # (this shouldn't happen with proper mail messages, but - # we seen many broken ones), we should add '\n' to make - # new (and the only header, in this case) to be properly - # separated from the message body. - if content[0:len(linebreak)] != linebreak: - suffix = suffix + linebreak - - self.ui.debug('', - 'addmessageheader: insertionpoint = %d' % insertionpoint) - headers = content[0:insertionpoint] - self.ui.debug('', - 'addmessageheader: headers = %s' % repr(headers)) - new_header = prefix + ("%s: %s" % (headername, headervalue)) + suffix - self.ui.debug('', - 'addmessageheader: new_header = %s' % repr(new_header)) - return headers + new_header + content[insertionpoint:] - - def __find_eoh(self, content): - """Searches for the point where mail headers end. - - Either double '\n', or end of string. - - Arguments: - - content: contents of the message to search in - Returns: position of the first non-header byte. - """ - - eoh_cr = content.find('\n\n') - if eoh_cr == -1: - eoh_cr = len(content) - - return eoh_cr - - def getmessageheader(self, content, name): - """Return the value of the first occurence of the given header. + def getmessageheader(self, msg, headername): + """Return the value of an undefined occurence of the given header. Header name is case-insensitive. Arguments: - - contents: message itself - - name: name of the header to be searched + - msg: message object + - headername: name of the header to be searched Returns: header value or None if no such header was found. """ - self.ui.debug('', 'getmessageheader: called to get %s' % name) - eoh = self.__find_eoh(content) - self.ui.debug('', 'getmessageheader: eoh = %d' % eoh) - headers = content[0:eoh] - self.ui.debug('', 'getmessageheader: headers = %s' % repr(headers)) + self.ui.debug('', 'getmessageheader: called to get %s' % headername) + return msg.get(headername) - m = re.search('^%s:(.*)$' % name, headers, - flags=re.MULTILINE | re.IGNORECASE) - if m: - return m.group(1).strip() - else: - return None - - def getmessageheaderlist(self, content, name): + def getmessageheaderlist(self, msg, headername): """Return a list of values for the given header. + Header name is case-insensitive. + Arguments: - - contents: message itself - - name: name of the header to be searched + - msg: message object + - headername: name of the header to be searched Returns: list of header values or empty list if no such header was found. """ - self.ui.debug('', 'getmessageheaderlist: called to get %s' % name) - eoh = self.__find_eoh(content) - self.ui.debug('', 'getmessageheaderlist: eoh = %d' % eoh) - headers = content[0:eoh] - self.ui.debug('', 'getmessageheaderlist: headers = %s' % repr(headers)) + self.ui.debug('', 'getmessageheaderlist: called to get %s' % headername) + return msg.get_all(headername, []) - return re.findall('^%s:(.*)$' % - name, headers, flags=re.MULTILINE | re.IGNORECASE) - - def deletemessageheaders(self, content, header_list): - """Deletes headers in the given list from the message content. + def deletemessageheaders(self, msg, header_list): + """Deletes headers in the given list from the message. Arguments: - - content: message itself + - msg: message object - header_list: list of headers to be deleted or just the header name - We expect our message to have '\n' as line endings.""" + """ if type(header_list) != type([]): header_list = [header_list] self.ui.debug('', 'deletemessageheaders: called to delete %s' % header_list) - if not len(header_list): - return content + for h in header_list: + del msg[h] - eoh = self.__find_eoh(content) - self.ui.debug('', 'deletemessageheaders: end of headers = %d' % eoh) - headers = content[0:eoh] - rest = content[eoh:] - self.ui.debug('', 'deletemessageheaders: headers = %s' % repr(headers)) - new_headers = [] - for h in headers.split('\n'): - keep_it = True - for trim_h in header_list: - if len(h) > len(trim_h) \ - and h[0:len(trim_h) + 1] == (trim_h + ":"): - keep_it = False - break - if keep_it: - new_headers.append(h) + return - return '\n'.join(new_headers) + rest + def get_message_date(self, msg, header="Date"): + """Returns the Unix timestamp of the email message, derived from the + Date field header by default. + + Arguments: + - msg: message object + - header: header to extract the date from + + Returns: timestamp or `None` in the case of failure. + """ + + datetuple = parsedate_tz(msg.get(header)) + if datetuple is None: + return None + + return mktime_tz(datetuple) def change_message_uid(self, uid, new_uid): """Change the message from existing uid to new_uid. diff --git a/offlineimap/folder/Gmail.py b/offlineimap/folder/Gmail.py index af236e1..d047cf2 100644 --- a/offlineimap/folder/Gmail.py +++ b/offlineimap/folder/Gmail.py @@ -69,14 +69,13 @@ class GmailFolder(IMAPFolder): data = self._fetch_from_imap(str(uid), self.retrycount) # data looks now e.g. - # ['320 (X-GM-LABELS (...) UID 17061 BODY[] {2565}','msgbody....'] + # ['320 (X-GM-LABELS (...) UID 17061 BODY[] {2565}',] # we only asked for one message, and that msg is in data[1]. - # msbody is in [1]. - body = data[1].replace("\r\n", "\n") + msg = data[1] # Embed the labels into the message headers if self.synclabels: - m = re.search('X-GM-LABELS\s*[(](.*)[)]', data[0][0]) + m = re.search('X-GM-LABELS\s*[(](.*)[)]', data[0]) if m: labels = set([imaputil.dequote(lb) for lb in imaputil.imapsplit(m.group(1))]) else: @@ -84,19 +83,23 @@ class GmailFolder(IMAPFolder): labels = labels - self.ignorelabels labels_str = imaputil.format_labels_string(self.labelsheader, sorted(labels)) - # First remove old label headers that may be in the message content retrieved + # First remove old label headers that may be in the message body retrieved # from gmail Then add a labels header with current gmail labels. - body = self.deletemessageheaders(body, self.labelsheader) - body = self.addmessageheader(body, '\n', self.labelsheader, labels_str) + self.deletemessageheaders(msg, self.labelsheader) + self.addmessageheader(msg, self.labelsheader, labels_str) - if len(body) > 200: - dbg_output = "%s...%s" % (str(body)[:150], str(body)[-50:]) - else: - dbg_output = body + if self.ui.is_debugging('imap'): + # Optimization: don't create the debugging objects unless needed + msg_s = msg.as_string(policy=self.policy['8bit-RFC']) + if len(msg_s) > 200: + dbg_output = "%s...%s" % (msg_s[:150], msg_s[-50:]) + else: + dbg_output = msg_s - self.ui.debug('imap', "Returned object from fetching %d: '%s'" % - (uid, dbg_output)) - return body + self.ui.debug('imap', "Returned object from fetching %d: '%s'" % + (uid, dbg_output)) + + return msg def getmessagelabels(self, uid): if 'labels' in self.messagelist[uid]: @@ -167,7 +170,7 @@ class GmailFolder(IMAPFolder): rtime = imaplibutil.Internaldate2epoch(messagestr) self.messagelist[uid] = {'uid': uid, 'flags': flags, 'labels': labels, 'time': rtime} - def savemessage(self, uid, content, flags, rtime): + def savemessage(self, uid, msg, flags, rtime): """Save the message on the Server This backend always assigns a new uid, so the uid arg is ignored. @@ -180,7 +183,7 @@ class GmailFolder(IMAPFolder): savemessage is never called in a dryrun mode. :param uid: Message UID - :param content: Message content + :param msg: Message object :param flags: Message flags :param rtime: A timestamp to be used as the mail date :returns: the UID of the new message as assigned by the server. If the @@ -189,13 +192,13 @@ class GmailFolder(IMAPFolder): read-only for example) it will return -1.""" if not self.synclabels: - return super(GmailFolder, self).savemessage(uid, content, flags, rtime) + return super(GmailFolder, self).savemessage(uid, msg, flags, rtime) labels = set() - for hstr in self.getmessageheaderlist(content, self.labelsheader): + for hstr in self.getmessageheaderlist(msg, self.labelsheader): labels.update(imaputil.labels_from_header(self.labelsheader, hstr)) - ret = super(GmailFolder, self).savemessage(uid, content, flags, rtime) + ret = super(GmailFolder, self).savemessage(uid, msg, flags, rtime) self.savemessagelabels(ret, labels) return ret diff --git a/offlineimap/folder/GmailMaildir.py b/offlineimap/folder/GmailMaildir.py index ebefd1b..1dfccc7 100644 --- a/offlineimap/folder/GmailMaildir.py +++ b/offlineimap/folder/GmailMaildir.py @@ -90,12 +90,12 @@ class GmailMaildirFolder(MaildirFolder): if not os.path.exists(filepath): return set() - file = open(filepath, 'rt') - content = file.read() - file.close() + fd = open(filepath, 'rb') + msg = self.parser['8bit'].parse(fd) + fd.close() self.messagelist[uid]['labels'] = set() - for hstr in self.getmessageheaderlist(content, self.labelsheader): + for hstr in self.getmessageheaderlist(msg, self.labelsheader): self.messagelist[uid]['labels'].update( imaputil.labels_from_header(self.labelsheader, hstr)) self.messagelist[uid]['labels_cached'] = True @@ -108,7 +108,7 @@ class GmailMaildirFolder(MaildirFolder): else: return self.messagelist[uid]['mtime'] - def savemessage(self, uid, content, flags, rtime): + def savemessage(self, uid, msg, flags, rtime): """Writes a new message, with the specified uid. See folder/Base for detail. Note that savemessage() does not @@ -116,14 +116,15 @@ class GmailMaildirFolder(MaildirFolder): savemessage is never called in a dryrun mode.""" if not self.synclabels: - return super(GmailMaildirFolder, self).savemessage(uid, content, + return super(GmailMaildirFolder, self).savemessage(uid, msg, flags, rtime) labels = set() - for hstr in self.getmessageheaderlist(content, self.labelsheader): + for hstr in self.getmessageheaderlist(msg, self.labelsheader): labels.update(imaputil.labels_from_header(self.labelsheader, hstr)) - ret = super(GmailMaildirFolder, self).savemessage(uid, content, flags, + # TODO - Not sure why the returned uid is stored early as ret here? + ret = super(GmailMaildirFolder, self).savemessage(uid, msg, flags, rtime) # Update the mtime and labels. @@ -145,12 +146,12 @@ class GmailMaildirFolder(MaildirFolder): filename = self.messagelist[uid]['filename'] filepath = os.path.join(self.getfullname(), filename) - file = open(filepath, 'rt') - content = file.read() - file.close() + fd = open(filepath, 'rb') + msg = self.parser['8bit'].parse(fd) + fd.close() oldlabels = set() - for hstr in self.getmessageheaderlist(content, self.labelsheader): + for hstr in self.getmessageheaderlist(msg, self.labelsheader): oldlabels.update(imaputil.labels_from_header(self.labelsheader, hstr)) @@ -167,15 +168,14 @@ class GmailMaildirFolder(MaildirFolder): sorted(labels | ignoredlabels)) # First remove old labels header, and then add the new one. - content = self.deletemessageheaders(content, self.labelsheader) - content = self.addmessageheader(content, '\n', self.labelsheader, - labels_str) + self.deletemessageheaders(msg, self.labelsheader) + self.addmessageheader(msg, self.labelsheader, labels_str) mtime = int(os.stat(filepath).st_mtime) # Write file with new labels to a unique file in tmp. messagename = self.new_message_filename(uid, set()) - tmpname = self.save_to_tmp_file(messagename, content) + tmpname = self.save_to_tmp_file(messagename, msg) tmppath = os.path.join(self.getfullname(), tmpname) # Move to actual location. diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py index f08361b..16d65da 100644 --- a/offlineimap/folder/IMAP.py +++ b/offlineimap/folder/IMAP.py @@ -20,7 +20,7 @@ import binascii import re import time from sys import exc_info -from offlineimap import imaputil, imaplibutil, emailutil, OfflineImapError +from offlineimap import imaputil, imaplibutil, OfflineImapError from offlineimap import globals from imaplib2 import MonthNames from .Base import BaseFolder @@ -30,13 +30,6 @@ CRLF = '\r\n' MSGCOPY_NAMESPACE = 'MSGCOPY_' -# NB: message returned from getmessage() will have '\n' all over the place, -# NB: there will be no CRLFs. Just before the sending stage of savemessage() -# NB: '\n' will be transformed back to CRLF. So, for the most parts of the -# NB: code the stored content will be clean of CRLF and one can rely that -# NB: line endings will be pure '\n'. - - class IMAPFolder(BaseFolder): def __init__(self, imapserver, name, repository, decode=True): # decode the folder name from IMAP4_utf_7 to utf_8 if @@ -349,19 +342,22 @@ class IMAPFolder(BaseFolder): data = self._fetch_from_imap(str(uid), self.retrycount) # Data looks now e.g. - # ['320 (17061 BODY[] {2565}','msgbody....'] + # ['320 (17061 BODY[] {2565}',] # Is a list of two elements. Message is at [1] - data = data[1].replace(CRLF, "\n") + msg = data[1] - if len(data) > 200: - dbg_output = "%s...%s" % (str(data)[:150], str(data)[-50:]) - else: - dbg_output = data + if self.ui.is_debugging('imap'): + # Optimization: don't create the debugging objects unless needed + msg_s = msg.as_string(policy=self.policy['8bit-RFC']) + if len(msg_s) > 200: + dbg_output = "%s...%s" % (msg_s[:150], msg_s[-50:]) + else: + dbg_output = msg_s - self.ui.debug('imap', "Returned object from fetching %d: '%s'" % - (uid, dbg_output)) + self.ui.debug('imap', "Returned object from fetching %d: '%s'" % + (uid, dbg_output)) - return data + return msg # Interface from BaseFolder def getmessagetime(self, uid): @@ -375,7 +371,7 @@ class IMAPFolder(BaseFolder): def getmessagekeywords(self, uid): return self.messagelist[uid]['keywords'] - def __generate_randomheader(self, content): + def __generate_randomheader(self, msg, policy=None): """Returns a unique X-OfflineIMAP header Generate an 'X-OfflineIMAP' mail header which contains a random @@ -390,17 +386,21 @@ class IMAPFolder(BaseFolder): """ headername = 'X-OfflineIMAP' + if policy is None: + output_policy = self.policy['8bit-RFC'] + else: + output_policy = policy # We need a random component too. If we ever upload the same # mail twice (e.g. in different folders), we would still need to # get the UID for the correct one. As we won't have too many # mails with identical content, the randomness requirements are # not extremly critial though. - # Compute unsigned crc32 of 'content' as unique hash. + # Compute unsigned crc32 of 'msg' (as bytes) into a unique hash. # NB: crc32 returns unsigned only starting with python 3.0. - headervalue = str(binascii.crc32(str.encode(content)) - & 0xffffffff) + '-' - headervalue += str(self.randomgenerator.randint(0, 9999999999)) + headervalue = '{}-{}'.format( + (binascii.crc32(msg.as_bytes(policy=output_policy)) & 0xffffffff), + self.randomgenerator.randint(0, 9999999999)) return headername, headervalue def __savemessage_searchforheader(self, imapobj, headername, headervalue): @@ -539,7 +539,7 @@ class IMAPFolder(BaseFolder): return 0 - def __getmessageinternaldate(self, content, rtime=None): + def __getmessageinternaldate(self, msg, rtime=None): """Parses mail and returns an INTERNALDATE string It will use information in the following order, falling back as an @@ -571,7 +571,7 @@ class IMAPFolder(BaseFolder): (which is fine as value for append).""" if rtime is None: - rtime = emailutil.get_message_date(content) + rtime = self.get_message_date(msg) if rtime is None: return None datetuple = time.localtime(rtime) @@ -619,7 +619,7 @@ class IMAPFolder(BaseFolder): return internaldate # Interface from BaseFolder - def savemessage(self, uid, content, flags, rtime): + def savemessage(self, uid, msg, flags, rtime): """Save the message on the Server This backend always assigns a new uid, so the uid arg is ignored. @@ -632,7 +632,7 @@ class IMAPFolder(BaseFolder): savemessage is never called in a dryrun mode. :param uid: Message UID - :param content: Message content + :param msg: Message Object :param flags: Message flags :param rtime: A timestamp to be used as the mail date :returns: the UID of the new message as assigned by the server. If the @@ -647,16 +647,17 @@ class IMAPFolder(BaseFolder): self.savemessageflags(uid, flags) return uid - content = self.deletemessageheaders(content, self.filterheaders) + # Filter user requested headers before uploading to the IMAP server + self.deletemessageheaders(msg, self.filterheaders) - # Use proper CRLF all over the message. - content = re.sub("(? 200: - dbg_output = "%s...%s" % (content[:150], content[-50:]) - else: - dbg_output = content - self.ui.debug('imap', "savemessage: date: %s, content: '%s'" % - (date, dbg_output)) + if self.ui.is_debugging('imap'): + # Optimization: don't create the debugging objects unless needed + msg_s = msg.as_string(policy=output_policy) + if len(msg_s) > 200: + dbg_output = "%s...%s" % (msg_s[:150], msg_s[-50:]) + else: + dbg_output = msg_s + self.ui.debug('imap', "savemessage: date: %s, content: '%s'" % + (date, dbg_output)) try: # Select folder for append and make the box READ-WRITE. @@ -695,7 +698,7 @@ class IMAPFolder(BaseFolder): except imapobj.readonly: # readonly exception. Return original uid to notify that # we did not save the message. (see savemessage in Base.py) - self.ui.msgtoreadonly(self, uid, content, flags) + self.ui.msgtoreadonly(self, uid) return uid # Do the APPEND. @@ -703,7 +706,7 @@ class IMAPFolder(BaseFolder): (typ, dat) = imapobj.append( self.getfullIMAPname(), imaputil.flagsmaildir2imap(flags), - date, bytes(content, 'utf-8')) + date, msg.as_bytes(policy=output_policy)) # This should only catch 'NO' responses since append() # will raise an exception for 'BAD' responses: if typ != 'OK': @@ -716,12 +719,12 @@ class IMAPFolder(BaseFolder): # In this case, we should immediately abort # the repository sync and continue # with the next account. - msg = \ + err_msg = \ "Saving msg (%s) in folder '%s', " \ "repository '%s' failed (abort). " \ "Server responded: %s %s\n" % \ (msg_id, self, self.getrepository(), typ, dat) - raise OfflineImapError(msg, OfflineImapError.ERROR.REPO) + raise OfflineImapError(err_msg, OfflineImapError.ERROR.REPO) retry_left = 0 # Mark as success. except imapobj.abort as e: # Connection has been reset, release connection and retry. @@ -832,7 +835,7 @@ class IMAPFolder(BaseFolder): """Fetches data from IMAP server. Arguments: - - uids: message UIDS + - uids: message UIDS (OfflineIMAP3: First UID returned only) - retry_num: number of retries to make Returns: data obtained by this query.""" @@ -888,9 +891,21 @@ class IMAPFolder(BaseFolder): "with UID '%s'" % (self.getrepository(), uids) raise OfflineImapError(reason, severity) - # Convert bytes to str + # JI: In offlineimap, this function returned a tuple of strings for each + # fetched UID, offlineimap3 calls to the imap object return bytes and so + # originally a fixed, utf-8 conversion was done and *only* the first + # response (d[0]) was returned. Note that this alters the behavior + # between code bases. However, it seems like a single UID is the intent + # of this function so retaining the modfication here for now. + # + # TODO: Can we assume the server response containing the meta data is + # always 'utf-8' encoded? Assuming yes for now. + # + # Convert responses, d[0][0], into a 'utf-8' string (from bytes) and + # Convert email, d[0][1], into a message object (from bytes) + ndata0 = data[0][0].decode('utf-8') - ndata1 = data[0][1].decode('utf-8', errors='replace') + ndata1 = self.parser['8bit-RFC'].parsebytes(data[0][1]) ndata = [ndata0, ndata1] return ndata diff --git a/offlineimap/folder/LocalStatus.py b/offlineimap/folder/LocalStatus.py index 4f8d42d..035f9e0 100644 --- a/offlineimap/folder/LocalStatus.py +++ b/offlineimap/folder/LocalStatus.py @@ -190,7 +190,7 @@ class LocalStatusFolder(BaseFolder): os.close(fd) # Interface from BaseFolder - def savemessage(self, uid, content, flags, rtime, mtime=0, labels=None): + def savemessage(self, uid, msg, flags, rtime, mtime=0, labels=None): """Writes a new message, with the specified uid. See folder/Base for detail. Note that savemessage() does not diff --git a/offlineimap/folder/LocalStatusSQLite.py b/offlineimap/folder/LocalStatusSQLite.py index 27a9a81..a576b9c 100644 --- a/offlineimap/folder/LocalStatusSQLite.py +++ b/offlineimap/folder/LocalStatusSQLite.py @@ -323,7 +323,7 @@ class LocalStatusSQLiteFolder(BaseFolder): # assert False,"getmessageflags() called on non-existing message" # Interface from BaseFolder - def savemessage(self, uid, content, flags, rtime, mtime=0, labels=None): + def savemessage(self, uid, msg, flags, rtime, mtime=0, labels=None): """Writes a new message, with the specified uid. See folder/Base for detail. Note that savemessage() does not diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 0c44b60..74a4ab2 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -20,12 +20,10 @@ import socket import time import re import os -from pathlib import Path from sys import exc_info from threading import Lock from hashlib import md5 -import chardet -from offlineimap import OfflineImapError, emailutil +from offlineimap import OfflineImapError from .Base import BaseFolder # Find the UID in a message filename @@ -256,23 +254,14 @@ class MaildirFolder(BaseFolder): # Interface from BaseFolder def getmessage(self, uid): - """Return the content of the message.""" + """Returns an email message object.""" - # TODO: Perhaps, force the encoding using config file filename = self.messagelist[uid]['filename'] filepath = os.path.join(self.getfullname(), filename) - # Open the file as binary and read it - file = Path(filepath) - blob = file.read_bytes() - # Detect the encoding - detection = chardet.detect(blob) - encoding = detection["encoding"] - # Read the file as text - retval = blob.decode(encoding) - - # TODO: WHY are we replacing \r\n with \n here? And why do we - # read it as text? - return retval.replace("\r\n", "\n") + fd = open(filepath, 'rb') + retval = self.parser['8bit'].parse(fd) + fd.close() + return retval # Interface from BaseFolder def getmessagetime(self, uid): @@ -297,17 +286,21 @@ class MaildirFolder(BaseFolder): uid, self._foldermd5, self.infosep, ''.join(sorted(flags))) return uniq_name.replace(os.path.sep, self.sep_subst) - def save_to_tmp_file(self, filename, content): - """Saves given content to the named temporary file in the + def save_to_tmp_file(self, filename, msg, policy=None): + """Saves given message to the named temporary file in the 'tmp' subdirectory of $CWD. Arguments: - filename: name of the temporary file; - - content: data to be saved. + - msg: Email message object Returns: relative path to the temporary file that was created.""" + if policy is None: + output_policy = self.policy['8bit'] + else: + output_policy = policy tmpname = os.path.join('tmp', filename) # Open file and write it out. # XXX: why do we need to loop 7 times? @@ -333,8 +326,8 @@ class MaildirFolder(BaseFolder): else: raise - fd = os.fdopen(fd, 'wt') - fd.write(content) + fd = os.fdopen(fd, 'wb') + fd.write(msg.as_bytes(policy=output_policy)) # Make sure the data hits the disk. fd.flush() if self.dofsync(): @@ -344,7 +337,7 @@ class MaildirFolder(BaseFolder): return tmpname # Interface from BaseFolder - def savemessage(self, uid, content, flags, rtime): + def savemessage(self, uid, msg, flags, rtime): """Writes a new message, with the specified uid. See folder/Base for detail. Note that savemessage() does not @@ -368,15 +361,15 @@ class MaildirFolder(BaseFolder): message_timestamp = None if self._filename_use_mail_timestamp is not False: try: - message_timestamp = emailutil.get_message_date(content, 'Date') + message_timestamp = self.get_message_date(msg, 'Date') if message_timestamp is None: # Give a try with Delivery-date - message_timestamp = emailutil.get_message_date( - content, 'Delivery-date') + message_timestamp = self.get_message_date( + msg, 'Delivery-date') except Exception as e: # This should never happen. from offlineimap.ui import getglobalui - datestr = emailutil.get_message_date(content) + datestr = self.get_message_date(msg) ui = getglobalui() ui.warn("UID %d has invalid date %s: %s\n" "Not using message timestamp as file prefix" % @@ -384,11 +377,11 @@ class MaildirFolder(BaseFolder): # No need to check if message_timestamp is None here since it # would be overridden by _gettimeseq. messagename = self.new_message_filename(uid, flags, date=message_timestamp) - tmpname = self.save_to_tmp_file(messagename, content) + tmpname = self.save_to_tmp_file(messagename, msg) if self._utime_from_header is True: try: - date = emailutil.get_message_date(content, 'Date') + date = self.get_message_date(msg, 'Date') if date is not None: os.utime(os.path.join(self.getfullname(), tmpname), (date, date)) @@ -396,7 +389,7 @@ class MaildirFolder(BaseFolder): # int32. except Exception as e: from offlineimap.ui import getglobalui - datestr = emailutil.get_message_date(content) + datestr = self.get_message_date(msg) ui = getglobalui() ui.warn("UID %d has invalid date %s: %s\n" "Not changing file modification time" % (uid, datestr, e)) diff --git a/offlineimap/folder/UIDMaps.py b/offlineimap/folder/UIDMaps.py index fd495d4..740cdbb 100644 --- a/offlineimap/folder/UIDMaps.py +++ b/offlineimap/folder/UIDMaps.py @@ -236,11 +236,11 @@ class MappedIMAPFolder(IMAPFolder): # Interface from BaseFolder def getmessage(self, uid): - """Returns the content of the specified message.""" + """Returns the specified message.""" return self._mb.getmessage(self.r2l[uid]) # Interface from BaseFolder - def savemessage(self, uid, content, flags, rtime): + def savemessage(self, uid, msg, flags, rtime): """Writes a new message, with the specified uid. The UIDMaps class will not return a newly assigned uid, as it @@ -271,7 +271,7 @@ class MappedIMAPFolder(IMAPFolder): self.savemessageflags(uid, flags) return uid - newluid = self._mb.savemessage(-1, content, flags, rtime) + newluid = self._mb.savemessage(-1, msg, flags, rtime) if newluid < 1: raise OfflineImapError("server of repository '%s' did not return " "a valid UID (got '%s') for UID '%s' from '%s'" % ( diff --git a/offlineimap/ui/UIBase.py b/offlineimap/ui/UIBase.py index 0307b23..d49d564 100644 --- a/offlineimap/ui/UIBase.py +++ b/offlineimap/ui/UIBase.py @@ -231,6 +231,9 @@ class UIBase: else: self.invaliddebug(debugtype) + def is_debugging(self, debugtype): + return (debugtype in self.debuglist) + def debugging(self, debugtype): global debugtypes self.logger.debug("Now debugging for %s: %s" % (debugtype, @@ -266,7 +269,7 @@ class UIBase: (self.getnicename(x), x.getname()) for x in folder_list]) # WARNINGS - def msgtoreadonly(self, destfolder, uid, content, flags): + def msgtoreadonly(self, destfolder, uid): if self.config.has_option('general', 'ignore-readonly') and \ self.config.getboolean('general', 'ignore-readonly'): return diff --git a/requirements.txt b/requirements.txt index 19d3fff..77050d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,4 @@ distro imaplib2~=3.5 urllib3~=1.25.9 certifi~=2020.6.20 -chardet~=3.0.4 \ No newline at end of file +