maxage: fix timezone issues, remove IMAP-IMAP support, add startdate option
1. When using maxage, local and remote messagelists are supposed to only contain messages from at most maxage days ago. But local and remote used different timezones to calculate what "maxage days ago" means, resulting in removals on one side. Now, we ask the local folder for maxage days' worth of mail, find the lowest UID, and then ask the remote folder for all UID's starting with that lowest one. 2. maxage was fundamentally wrong in the IMAP-IMAP case: it assumed that remote messages have UIDs in the same order as their local counterparts, which could be false, e.g. when messages are copied in quick succession. So, remove support for maxage in the IMAP-IMAP case. 3. Add startdate option for IMAP-IMAP syncs: use messages from the given repository starting at startdate, and all messages from the other repository. In the first sync, the other repository must be empty. 4. Allow maxage to be specified either as number of days to sync (as previously) or as a fixed date. Signed-off-by: Janna Martl <janna.martl109@gmail.com> Signed-off-by: Nicolas Sebrecht <nicolas.s-dev@laposte.net>
This commit is contained in:

committed by
Nicolas Sebrecht

parent
71693b7d8c
commit
8096f6cd5b
@ -17,6 +17,7 @@
|
||||
|
||||
import os.path
|
||||
import re
|
||||
import time
|
||||
from sys import exc_info
|
||||
|
||||
from offlineimap import threadutil
|
||||
@ -298,6 +299,76 @@ class BaseFolder(object):
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def getmaxage(self):
|
||||
""" maxage is allowed to be either an integer or a date of the
|
||||
form YYYY-mm-dd. This returns a time_struct. """
|
||||
|
||||
maxagestr = self.config.getdefault("Account %s"%
|
||||
self.accountname, "maxage", None)
|
||||
if maxagestr == None:
|
||||
return None
|
||||
# is it a number?
|
||||
try:
|
||||
maxage = int(maxagestr)
|
||||
if maxage < 1:
|
||||
raise OfflineImapError("invalid maxage value %d"% maxage,
|
||||
OfflineImapError.ERROR.MESSAGE)
|
||||
return time.gmtime(time.time() - 60*60*24*maxage)
|
||||
except ValueError:
|
||||
pass # maybe it was a date
|
||||
# is it a date string?
|
||||
try:
|
||||
date = time.strptime(maxagestr, "%Y-%m-%d")
|
||||
if date[0] < 1900:
|
||||
raise OfflineImapError("maxage led to year %d. "
|
||||
"Abort syncing."% date[0],
|
||||
OfflineImapError.ERROR.MESSAGE)
|
||||
return date
|
||||
except ValueError:
|
||||
raise OfflineImapError("invalid maxage value %s"% maxagestr,
|
||||
OfflineImapError.ERROR.MESSAGE)
|
||||
|
||||
def getmaxsize(self):
|
||||
return self.config.getdefaultint("Account %s"%
|
||||
self.accountname, "maxsize", None)
|
||||
|
||||
def getstartdate(self):
|
||||
""" Retrieve the value of the configuration option startdate """
|
||||
datestr = self.config.getdefault("Repository " + self.repository.name,
|
||||
'startdate', None)
|
||||
try:
|
||||
if not datestr:
|
||||
return None
|
||||
date = time.strptime(datestr, "%Y-%m-%d")
|
||||
if date[0] < 1900:
|
||||
raise OfflineImapError("startdate led to year %d. "
|
||||
"Abort syncing."% date[0],
|
||||
OfflineImapError.ERROR.MESSAGE)
|
||||
return date
|
||||
except ValueError:
|
||||
raise OfflineImapError("invalid startdate value %s",
|
||||
OfflineImapError.ERROR.MESSAGE)
|
||||
|
||||
def get_min_uid_file(self):
|
||||
startuiddir = os.path.join(self.config.getmetadatadir(),
|
||||
'Repository-' + self.repository.name, 'StartUID')
|
||||
if not os.path.exists(startuiddir):
|
||||
os.mkdir(startuiddir, 0o700)
|
||||
return os.path.join(startuiddir, self.getfolderbasename())
|
||||
|
||||
def retrieve_min_uid(self):
|
||||
uidfile = self.get_min_uid_file()
|
||||
if not os.path.exists(uidfile):
|
||||
return None
|
||||
try:
|
||||
fd = open(uidfile, 'rt')
|
||||
min_uid = long(fd.readline().strip())
|
||||
fd.close()
|
||||
return min_uid
|
||||
except:
|
||||
raise IOError("Can't read %s"% uidfile)
|
||||
|
||||
|
||||
def savemessage(self, uid, content, flags, rtime):
|
||||
"""Writes a new message, with the specified uid.
|
||||
|
||||
|
@ -121,16 +121,18 @@ class GmailFolder(IMAPFolder):
|
||||
|
||||
# TODO: merge this code with the parent's cachemessagelist:
|
||||
# TODO: they have too much common logics.
|
||||
def cachemessagelist(self):
|
||||
def cachemessagelist(self, min_date=None, min_uid=None):
|
||||
if not self.synclabels:
|
||||
return super(GmailFolder, self).cachemessagelist()
|
||||
return super(GmailFolder, self).cachemessagelist(
|
||||
min_date=min_date, min_uid=min_uid)
|
||||
|
||||
self.messagelist = {}
|
||||
|
||||
self.ui.collectingdata(None, self)
|
||||
imapobj = self.imapserver.acquireconnection()
|
||||
try:
|
||||
msgsToFetch = self._msgs_to_fetch(imapobj)
|
||||
msgsToFetch = self._msgs_to_fetch(
|
||||
imapobj, min_date=min_date, min_uid=min_uid)
|
||||
if not msgsToFetch:
|
||||
return # No messages to sync
|
||||
|
||||
|
@ -64,9 +64,9 @@ class GmailMaildirFolder(MaildirFolder):
|
||||
'filename': '/no-dir/no-such-file/', 'mtime': 0}
|
||||
|
||||
|
||||
def cachemessagelist(self):
|
||||
def cachemessagelist(self, min_date=None, min_uid=None):
|
||||
if self.ismessagelistempty():
|
||||
self.messagelist = self._scanfolder()
|
||||
self.messagelist = self._scanfolder(min_date=min_date, min_uid=min_uid)
|
||||
|
||||
# Get mtimes
|
||||
if self.synclabels:
|
||||
|
@ -18,6 +18,7 @@
|
||||
import random
|
||||
import binascii
|
||||
import re
|
||||
import os
|
||||
import time
|
||||
from sys import exc_info
|
||||
|
||||
@ -79,6 +80,12 @@ class IMAPFolder(BaseFolder):
|
||||
def waitforthread(self):
|
||||
self.imapserver.connectionwait()
|
||||
|
||||
def getmaxage(self):
|
||||
if self.config.getdefault("Account %s"%
|
||||
self.accountname, "maxage", None):
|
||||
raise OfflineImapError("maxage is not supported on IMAP-IMAP sync",
|
||||
OfflineImapError.ERROR.REPO), None, exc_info()[2]
|
||||
|
||||
# Interface from BaseFolder
|
||||
def getcopyinstancelimit(self):
|
||||
return 'MSGCOPY_' + self.repository.getname()
|
||||
@ -143,8 +150,7 @@ class IMAPFolder(BaseFolder):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _msgs_to_fetch(self, imapobj):
|
||||
def _msgs_to_fetch(self, imapobj, min_date=None, min_uid=None):
|
||||
"""Determines sequence numbers of messages to be fetched.
|
||||
|
||||
Message sequence numbers (MSNs) are more easily compacted
|
||||
@ -152,57 +158,55 @@ class IMAPFolder(BaseFolder):
|
||||
|
||||
Arguments:
|
||||
- imapobj: instance of IMAPlib
|
||||
- min_date (optional): a time_struct; only fetch messages newer than this
|
||||
- min_uid (optional): only fetch messages with UID >= min_uid
|
||||
|
||||
This function should be called with at MOST one of min_date OR
|
||||
min_uid set but not BOTH.
|
||||
|
||||
Returns: range(s) for messages or None if no messages
|
||||
are to be fetched."""
|
||||
|
||||
res_type, imapdata = imapobj.select(self.getfullname(), True, True)
|
||||
if imapdata == [None] or imapdata[0] == '0':
|
||||
# Empty folder, no need to populate message list
|
||||
return None
|
||||
def search(search_conditions):
|
||||
"""Actually request the server with the specified conditions.
|
||||
|
||||
# By default examine all messages in this folder
|
||||
msgsToFetch = '1:*'
|
||||
|
||||
maxage = self.config.getdefaultint(
|
||||
"Account %s"% self.accountname, "maxage", -1)
|
||||
maxsize = self.config.getdefaultint(
|
||||
"Account %s"% self.accountname, "maxsize", -1)
|
||||
|
||||
# Build search condition
|
||||
if (maxage != -1) | (maxsize != -1):
|
||||
search_cond = "(";
|
||||
|
||||
if(maxage != -1):
|
||||
#find out what the oldest message is that we should look at
|
||||
oldest_struct = time.gmtime(time.time() - (60*60*24*maxage))
|
||||
if oldest_struct[0] < 1900:
|
||||
raise OfflineImapError("maxage setting led to year %d. "
|
||||
"Abort syncing."% oldest_struct[0],
|
||||
OfflineImapError.ERROR.REPO)
|
||||
search_cond += "SINCE %02d-%s-%d"% (
|
||||
oldest_struct[2],
|
||||
MonthNames[oldest_struct[1]],
|
||||
oldest_struct[0])
|
||||
|
||||
if(maxsize != -1):
|
||||
if(maxage != -1): # There are two conditions, add space
|
||||
search_cond += " "
|
||||
search_cond += "SMALLER %d"% maxsize
|
||||
|
||||
search_cond += ")"
|
||||
|
||||
res_type, res_data = imapobj.search(None, search_cond)
|
||||
Returns: range(s) for messages or None if no messages
|
||||
are to be fetched."""
|
||||
res_type, res_data = imapobj.search(None, search_conditions)
|
||||
if res_type != 'OK':
|
||||
raise OfflineImapError("SEARCH in folder [%s]%s failed. "
|
||||
"Search string was '%s'. Server responded '[%s] %s'"% (
|
||||
self.getrepository(), self, search_cond, res_type, res_data),
|
||||
OfflineImapError.ERROR.FOLDER)
|
||||
return res_data[0].split()
|
||||
|
||||
# Resulting MSN are separated by space, coalesce into ranges
|
||||
msgsToFetch = imaputil.uid_sequence(res_data[0].split())
|
||||
res_type, imapdata = imapobj.select(self.getfullname(), True, True)
|
||||
if imapdata == [None] or imapdata[0] == '0':
|
||||
# Empty folder, no need to populate message list.
|
||||
return None
|
||||
|
||||
return msgsToFetch
|
||||
conditions = []
|
||||
# 1. min_uid condition.
|
||||
if min_uid != None:
|
||||
conditions.append("UID %d:*"% min_uid)
|
||||
# 2. date condition.
|
||||
elif min_date != None:
|
||||
# Find out what the oldest message is that we should look at.
|
||||
conditions.append("SINCE %02d-%s-%d"% (
|
||||
min_date[2], MonthNames[min_date[1]], min_date[0]))
|
||||
# 3. maxsize condition.
|
||||
maxsize = self.getmaxsize()
|
||||
if maxsize != None:
|
||||
conditions.append("SMALLER %d"% maxsize)
|
||||
|
||||
if len(conditions) >= 1:
|
||||
# Build SEARCH command.
|
||||
search_cond = "(%s)"% ' '.join(conditions)
|
||||
search_result = search(search_cond)
|
||||
return imaputil.uid_sequence(search_result)
|
||||
|
||||
# By default consider all messages in this folder.
|
||||
return '1:*'
|
||||
|
||||
# Interface from BaseFolder
|
||||
def msglist_item_initializer(self, uid):
|
||||
@ -210,19 +214,21 @@ class IMAPFolder(BaseFolder):
|
||||
|
||||
|
||||
# Interface from BaseFolder
|
||||
def cachemessagelist(self):
|
||||
def cachemessagelist(self, min_date=None, min_uid=None):
|
||||
self.ui.loadmessagelist(self.repository, self)
|
||||
self.messagelist = {}
|
||||
|
||||
imapobj = self.imapserver.acquireconnection()
|
||||
try:
|
||||
msgsToFetch = self._msgs_to_fetch(imapobj)
|
||||
msgsToFetch = self._msgs_to_fetch(
|
||||
imapobj, min_date=min_date, min_uid=min_uid)
|
||||
if not msgsToFetch:
|
||||
return # No messages to sync
|
||||
|
||||
# Get the flags and UIDs for these. single-quotes prevent
|
||||
# imaplib2 from quoting the sequence.
|
||||
res_type, response = imapobj.fetch("'%s'"%
|
||||
msgsToFetch, '(FLAGS UID)')
|
||||
msgsToFetch, '(FLAGS UID INTERNALDATE)')
|
||||
if res_type != 'OK':
|
||||
raise OfflineImapError("FETCHING UIDs in folder [%s]%s failed. "
|
||||
"Server responded '[%s] %s'"% (self.getrepository(), self,
|
||||
@ -247,6 +253,7 @@ class IMAPFolder(BaseFolder):
|
||||
flags = imaputil.flagsimap2maildir(options['FLAGS'])
|
||||
rtime = imaplibutil.Internaldate2epoch(messagestr)
|
||||
self.messagelist[uid] = {'uid': uid, 'flags': flags, 'time': rtime}
|
||||
self.ui.messagelistloaded(self.repository, self, self.getmessagecount())
|
||||
|
||||
def dropmessagelistcache(self):
|
||||
self.messagelist = {}
|
||||
|
@ -92,25 +92,17 @@ class MaildirFolder(BaseFolder):
|
||||
token."""
|
||||
return 42
|
||||
|
||||
# Checks to see if the given message is within the maximum age according
|
||||
# to the maildir name which should begin with a timestamp
|
||||
def _iswithinmaxage(self, messagename, maxage):
|
||||
# In order to have the same behaviour as SINCE in an IMAP search
|
||||
# we must convert this to the oldest time and then strip off hrs/mins
|
||||
# from that day.
|
||||
oldest_time_utc = time.time() - (60*60*24*maxage)
|
||||
oldest_time_struct = time.gmtime(oldest_time_utc)
|
||||
oldest_time_today_seconds = ((oldest_time_struct[3] * 3600) \
|
||||
+ (oldest_time_struct[4] * 60) \
|
||||
+ oldest_time_struct[5])
|
||||
oldest_time_utc -= oldest_time_today_seconds
|
||||
def _iswithintime(self, messagename, date):
|
||||
"""Check to see if the given message is newer than date (a
|
||||
time_struct) according to the maildir name which should begin
|
||||
with a timestamp."""
|
||||
|
||||
timestampmatch = re_timestampmatch.search(messagename)
|
||||
if not timestampmatch:
|
||||
return True
|
||||
timestampstr = timestampmatch.group()
|
||||
timestamplong = long(timestampstr)
|
||||
if(timestamplong < oldest_time_utc):
|
||||
if(timestamplong < time.mktime(date)):
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
@ -151,18 +143,21 @@ class MaildirFolder(BaseFolder):
|
||||
flags = set((c for c in flagmatch.group(1) if not c.islower()))
|
||||
return prefix, uid, fmd5, flags
|
||||
|
||||
def _scanfolder(self):
|
||||
def _scanfolder(self, min_date=None, min_uid=None):
|
||||
"""Cache the message list from a Maildir.
|
||||
|
||||
If min_date is set, this finds the min UID of all messages newer than
|
||||
min_date and uses it as the real cutoff for considering messages.
|
||||
This handles the edge cases where the date is much earlier than messages
|
||||
with similar UID's (e.g. the UID was reassigned much later).
|
||||
|
||||
Maildir flags are: R (replied) S (seen) T (trashed) D (draft) F
|
||||
(flagged).
|
||||
:returns: dict that can be used as self.messagelist.
|
||||
"""
|
||||
|
||||
maxage = self.config.getdefaultint("Account " + self.accountname,
|
||||
"maxage", None)
|
||||
maxsize = self.config.getdefaultint("Account " + self.accountname,
|
||||
"maxsize", None)
|
||||
maxsize = self.getmaxsize()
|
||||
|
||||
retval = {}
|
||||
files = []
|
||||
nouidcounter = -1 # Messages without UIDs get negative UIDs.
|
||||
@ -171,12 +166,11 @@ class MaildirFolder(BaseFolder):
|
||||
files.extend((dirannex, filename) for
|
||||
filename in os.listdir(fulldirname))
|
||||
|
||||
date_excludees = {}
|
||||
for dirannex, filename in files:
|
||||
# We store just dirannex and filename, ie 'cur/123...'
|
||||
filepath = os.path.join(dirannex, filename)
|
||||
# Check maxage/maxsize if this message should be considered.
|
||||
if maxage and not self._iswithinmaxage(filename, maxage):
|
||||
continue
|
||||
# Check maxsize if this message should be considered.
|
||||
if maxsize and (os.path.getsize(os.path.join(
|
||||
self.getfullname(), filepath)) > maxsize):
|
||||
continue
|
||||
@ -193,16 +187,43 @@ class MaildirFolder(BaseFolder):
|
||||
nouidcounter -= 1
|
||||
else:
|
||||
uid = long(uidmatch.group(1))
|
||||
# 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S
|
||||
retval[uid] = self.msglist_item_initializer(uid)
|
||||
retval[uid]['flags'] = flags
|
||||
retval[uid]['filename'] = filepath
|
||||
if min_uid != None and uid > 0 and uid < min_uid:
|
||||
continue
|
||||
if min_date != None and not self._iswithintime(filename, min_date):
|
||||
# Keep track of messages outside of the time limit, because they
|
||||
# still might have UID > min(UIDs of within-min_date). We hit
|
||||
# this case for maxage if any message had a known/valid datetime
|
||||
# and was re-uploaded because the UID in the filename got lost
|
||||
# (e.g. local copy/move). On next sync, it was assigned a new
|
||||
# UID from the server and will be included in the SEARCH
|
||||
# condition. So, we must re-include them later in this method
|
||||
# in order to avoid inconsistent lists of messages.
|
||||
date_excludees[uid] = self.msglist_item_initializer(uid)
|
||||
date_excludees[uid]['flags'] = flags
|
||||
date_excludees[uid]['filename'] = filepath
|
||||
else:
|
||||
# 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S
|
||||
retval[uid] = self.msglist_item_initializer(uid)
|
||||
retval[uid]['flags'] = flags
|
||||
retval[uid]['filename'] = filepath
|
||||
if min_date != None:
|
||||
# Re-include messages with high enough uid's.
|
||||
positive_uids = filter(lambda uid: uid > 0, retval)
|
||||
if positive_uids:
|
||||
min_uid = min(positive_uids)
|
||||
for uid in date_excludees.keys():
|
||||
if uid > min_uid:
|
||||
# This message was originally excluded because of
|
||||
# its date. It is re-included now because we want all
|
||||
# messages with UID > min_uid.
|
||||
retval[uid] = date_excludees[uid]
|
||||
return retval
|
||||
|
||||
# Interface from BaseFolder
|
||||
def quickchanged(self, statusfolder):
|
||||
"""Returns True if the Maildir has changed"""
|
||||
self.cachemessagelist()
|
||||
"""Returns True if the Maildir has changed
|
||||
|
||||
Assumes cachemessagelist() has already been called """
|
||||
# Folder has different uids than statusfolder => TRUE.
|
||||
if sorted(self.getmessageuidlist()) != \
|
||||
sorted(statusfolder.getmessageuidlist()):
|
||||
@ -219,9 +240,12 @@ class MaildirFolder(BaseFolder):
|
||||
return {'flags': set(), 'filename': '/no-dir/no-such-file/'}
|
||||
|
||||
# Interface from BaseFolder
|
||||
def cachemessagelist(self):
|
||||
def cachemessagelist(self, min_date=None, min_uid=None):
|
||||
if self.ismessagelistempty():
|
||||
self.messagelist = self._scanfolder()
|
||||
self.ui.loadmessagelist(self.repository, self)
|
||||
self.messagelist = self._scanfolder(min_date=min_date,
|
||||
min_uid=min_uid)
|
||||
self.ui.messagelistloaded(self.repository, self, self.getmessagecount())
|
||||
|
||||
# Interface from BaseFolder
|
||||
def getmessagelist(self):
|
||||
|
@ -94,9 +94,10 @@ class MappedIMAPFolder(IMAPFolder):
|
||||
OfflineImapError.ERROR.MESSAGE), None, exc_info()[2]
|
||||
|
||||
# Interface from BaseFolder
|
||||
def cachemessagelist(self):
|
||||
self._mb.cachemessagelist()
|
||||
def cachemessagelist(self, min_date=None, min_uid=None):
|
||||
self._mb.cachemessagelist(min_date=min_date, min_uid=min_uid)
|
||||
reallist = self._mb.getmessagelist()
|
||||
self.messagelist = self._mb.messagelist
|
||||
|
||||
self.maplock.acquire()
|
||||
try:
|
||||
|
Reference in New Issue
Block a user