Patch for maxage and maxsize options

Dear All,
Attached is the patch that I have developed to provide maxage and
maxsize options.  You can thus sync only the last x days of messages and
exclude large messages.  All details in the attached git file.

Regards,

-Mike

-- Attached file included as plaintext by Ecartis --
-- File: submit

From 04fead2b46a79675a5b29de6f2b4088b9c9448e5 Mon Sep 17 00:00:00 2001
From: mike <mike@mikelaptop.(none)>
Date: Sun, 16 Aug 2009 17:00:49 +0430
Subject: [PATCH] Patch to provide maxage and maxsize account options to exclude old/large messages

This is designed to make offlineimap even better for low bandwidth connections.

maxage allows you to specify a number of days and only messages within
that range will be considered by offlineimap for the sync.  This can be
useful if you would like to start using offlineimap with a large
existing account and do not want to import large archives of mail.

maxsize allows you to specify the maximum size of a message to consider so
that you can exclude messages with large attachments etc.

In both cases the cachemessagelist function of the folder was modified to ignore
messages that do not meet the criteria.  If the criteria are not specified
then the existing code will be executed the same as before.  If a message
does not meet the criteria it will be as though this message does not exist
- offlineimap will completely ignore it.  It will not have flags updated,
it will not be deleted, it will not be considered at all.

When operating against an IMAP repository a server side search function
is used.  This of course requires support for server side search.

I have tested this with either option, no options etc. against IMAP, Maildir
and Gmail.  I have run variations of this patch here for the last 3 weeks or
so syncing about 4 accounts normally.
This commit is contained in:
Mike Dawson 2009-08-16 17:12:39 +04:30 committed by John Goerzen
parent 312b91a1a5
commit cde94e5047
4 changed files with 129 additions and 12 deletions

View File

@ -195,6 +195,30 @@ remoterepository = RemoteExample
# You can also specify parameters to the commands # You can also specify parameters to the commands
# presynchook = imapfilter -c someotherconfig.lua # presynchook = imapfilter -c someotherconfig.lua
# If you have a limited amount of bandwidth available you can exclude larger
# messages (e.g. those with large attachments etc). If you do this it
# will appear to offlineimap that these messages do not exist at all. They
# will not be copied, have flags changed etc. For this to work on an IMAP
# server the server must have server side search enabled. This works with gmail
# and most imap servers (e.g. cyrus etc)
# The maximum size should be specified in bytes - e.g. 2000000 for approx 2MB
# maxsize = 2000000
# When you are starting to sync an already existing account yuo can tell offlineimap
# to sync messages from only the last x days. When you do this messages older than x
# days will be completely ignored. This can be useful for importing existing accounts
# when you do not want to download large amounts of archive email.
# Messages older than maxage days will not be synced, their flags will
# not be changed, they will not be deleted etc. For offlineimap it will be like these
# messages do not exist. This will perform an IMAP search in the case of IMAP or Gmail
# and therefor requires that the server support server side searching. This will
# calculate the earliest day that would be included in the search and include all
# messages from that day until today. e.g. maxage = 3 to sync only the last 3 days mail
# maxage = 3
[Repository LocalExample] [Repository LocalExample]

View File

@ -183,15 +183,21 @@ class AccountSynchronizationMixin:
# Connect to the local cache. # Connect to the local cache.
self.statusrepos = offlineimap.repository.LocalStatus.LocalStatusRepository(self.getconf('localrepository'), self) self.statusrepos = offlineimap.repository.LocalStatus.LocalStatusRepository(self.getconf('localrepository'), self)
#might need changes here to ensure that one account sync does not crash others...
if not self.refreshperiod: if not self.refreshperiod:
self.sync(siglistener) self.sync(siglistener)
self.ui.acctdone(self.name) self.ui.acctdone(self.name)
return return
looping = 1 looping = 1
while looping: while looping:
self.sync(siglistener) self.sync(siglistener)
looping = self.sleeper(siglistener) != 2 looping = self.sleeper(siglistener) != 2
self.ui.acctdone(self.name) self.ui.acctdone(self.name)
def getaccountmeta(self): def getaccountmeta(self):
return os.path.join(self.metadatadir, 'Account-' + self.name) return os.path.join(self.metadatadir, 'Account-' + self.name)

View File

@ -23,6 +23,7 @@ from offlineimap.version import versionstr
import rfc822, time, string, random, binascii, re import rfc822, time, string, random, binascii, re
from StringIO import StringIO from StringIO import StringIO
from copy import copy from copy import copy
import time
class IMAPFolder(BaseFolder): class IMAPFolder(BaseFolder):
@ -115,6 +116,7 @@ class IMAPFolder(BaseFolder):
return False return False
# TODO: Make this so that it can define a date that would be the oldest messages etc.
def cachemessagelist(self): def cachemessagelist(self):
imapobj = self.imapserver.acquireconnection() imapobj = self.imapserver.acquireconnection()
self.messagelist = {} self.messagelist = {}
@ -122,20 +124,63 @@ class IMAPFolder(BaseFolder):
try: try:
# Primes untagged_responses # Primes untagged_responses
imapobj.select(self.getfullname(), readonly = 1, force = 1) imapobj.select(self.getfullname(), readonly = 1, force = 1)
try:
# Some mail servers do not return an EXISTS response if
# the folder is empty.
maxmsgid = long(imapobj.untagged_responses['EXISTS'][0])
except KeyError:
return
if maxmsgid < 1:
# No messages; return
return
maxage = self.config.getdefaultint("Account " + self.accountname, "maxage", -1)
maxsize = self.config.getdefaultint("Account " + self.accountname, "maxsize", -1)
if (maxage != -1) | (maxsize != -1):
try:
search_condition = "(";
if(maxage != -1):
#find out what the oldest message is that we should look at
oldest_time_struct = time.gmtime(time.time() - (60*60*24*maxage))
#format this manually - otherwise locales could cause problems
monthnames_standard = ["Jan", "Feb", "Mar", "Apr", "May", \
"June", "July", "Aug", "Sep", "Oct", "Nov", "Dec"]
our_monthname = monthnames_standard[oldest_time_struct[1]-1]
daystr = "%(day)02d" % {'day' : oldest_time_struct[2]}
date_search_str = "SINCE " + daystr + "-" + our_monthname \
+ "-" + str(oldest_time_struct[0])
search_condition += date_search_str
if(maxsize != -1):
if(maxage != 1): #There are two conditions - add a space
search_condition += " "
search_condition += "SMALLER " + self.config.getdefault("Account " + self.accountname, "maxsize", -1)
search_condition += ")"
searchresult = imapobj.search(None, search_condition)
#result would come back seperated by space - to change into a fetch
#statement we need to change space to comma
messagesToFetch = searchresult[1][0].replace(" ", ",")
except KeyError:
return
if len(messagesToFetch) < 1:
# No messages; return
return
else:
try:
# Some mail servers do not return an EXISTS response if
# the folder is empty.
maxmsgid = long(imapobj.untagged_responses['EXISTS'][0])
messagesToFetch = '1:%d' % maxmsgid;
except KeyError:
return
if maxmsgid < 1:
#no messages; return
return
# Now, get the flags and UIDs for these. # Now, get the flags and UIDs for these.
# We could conceivably get rid of maxmsgid and just say # We could conceivably get rid of maxmsgid and just say
# '1:*' here. # '1:*' here.
response = imapobj.fetch('1:%d' % maxmsgid, '(FLAGS UID INTERNALDATE)')[1]
response = imapobj.fetch(messagesToFetch, '(FLAGS UID INTERNALDATE)')[1]
finally: finally:
self.imapserver.releaseconnection(imapobj) self.imapserver.releaseconnection(imapobj)
for messagestr in response: for messagestr in response:

View File

@ -29,6 +29,7 @@ except ImportError:
uidmatchre = re.compile(',U=(\d+)') uidmatchre = re.compile(',U=(\d+)')
flagmatchre = re.compile(':.*2,([A-Z]+)') flagmatchre = re.compile(':.*2,([A-Z]+)')
timestampmatchre = re.compile('(\d+)');
timeseq = 0 timeseq = 0
lasttime = long(0) lasttime = long(0)
@ -72,6 +73,28 @@ class MaildirFolder(BaseFolder):
token.""" token."""
return 42 return 42
#Checks to see if the given message is within the maximum age according
#to the maildir name which should begin with a timestamp
def _iswithinmaxage(self, messagename, maxage):
#In order to have the same behaviour as SINCE in an IMAP search
#we must convert this to the oldest time and then strip off hrs/mins
#from that day
oldest_time_utc = time.time() - (60*60*24*maxage)
oldest_time_struct = time.gmtime(oldest_time_utc)
oldest_time_today_seconds = ((oldest_time_struct[3] * 3600) \
+ (oldest_time_struct[4] * 60) \
+ oldest_time_struct[5])
oldest_time_utc -= oldest_time_today_seconds
timestampmatch = timestampmatchre.search(messagename)
timestampstr = timestampmatch.group()
timestamplong = long(timestampstr)
if(timestamplong < oldest_time_utc):
return False
else:
return True
def _scanfolder(self): def _scanfolder(self):
"""Cache the message list. Maildir flags are: """Cache the message list. Maildir flags are:
R (replied) R (replied)
@ -92,6 +115,25 @@ class MaildirFolder(BaseFolder):
filename in os.listdir(fulldirname)) filename in os.listdir(fulldirname))
for file in files: for file in files:
messagename = os.path.basename(file) messagename = os.path.basename(file)
#check if there is a parameter for maxage / maxsize - then see if this
#message should be considered or not
maxage = self.config.getdefaultint("Account " + self.accountname, "maxage", -1)
maxsize = self.config.getdefaultint("Account " + self.accountname, "maxsize", -1)
if(maxage != -1):
isnewenough = self._iswithinmaxage(messagename, maxage)
if(isnewenough != True):
#this message is older than we should consider....
continue
#Check and see if the message is too big if the maxsize for this account is set
if(maxsize != -1):
filesize = os.path.getsize(file)
if(filesize > maxsize):
continue
foldermatch = messagename.find(folderstr) != -1 foldermatch = messagename.find(folderstr) != -1
if not foldermatch: if not foldermatch:
# If there is no folder MD5 specified, or if it mismatches, # If there is no folder MD5 specified, or if it mismatches,