cde94e5047
Dear All, Attached is the patch that I have developed to provide maxage and maxsize options. You can thus sync only the last x days of messages and exclude large messages. All details in the attached git file. Regards, -Mike -- Attached file included as plaintext by Ecartis -- -- File: submit From 04fead2b46a79675a5b29de6f2b4088b9c9448e5 Mon Sep 17 00:00:00 2001 From: mike <mike@mikelaptop.(none)> Date: Sun, 16 Aug 2009 17:00:49 +0430 Subject: [PATCH] Patch to provide maxage and maxsize account options to exclude old/large messages This is designed to make offlineimap even better for low bandwidth connections. maxage allows you to specify a number of days and only messages within that range will be considered by offlineimap for the sync. This can be useful if you would like to start using offlineimap with a large existing account and do not want to import large archives of mail. maxsize allows you to specify the maximum size of a message to consider so that you can exclude messages with large attachments etc. In both cases the cachemessagelist function of the folder was modified to ignore messages that do not meet the criteria. If the criteria are not specified then the existing code will be executed the same as before. If a message does not meet the criteria it will be as though this message does not exist - offlineimap will completely ignore it. It will not have flags updated, it will not be deleted, it will not be considered at all. When operating against an IMAP repository a server side search function is used. This of course requires support for server side search. I have tested this with either option, no options etc. against IMAP, Maildir and Gmail. I have run variations of this patch here for the last 3 weeks or so syncing about 4 accounts normally.
309 lines
11 KiB
Python
309 lines
11 KiB
Python
# Maildir folder support
|
|
# Copyright (C) 2002 - 2007 John Goerzen
|
|
# <jgoerzen@complete.org>
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
import os.path, os, re, time, socket
|
|
from Base import BaseFolder
|
|
from offlineimap import imaputil
|
|
from offlineimap.ui import UIBase
|
|
from threading import Lock
|
|
|
|
try:
|
|
from hashlib import md5
|
|
except ImportError:
|
|
from md5 import md5
|
|
|
|
uidmatchre = re.compile(',U=(\d+)')
|
|
flagmatchre = re.compile(':.*2,([A-Z]+)')
|
|
timestampmatchre = re.compile('(\d+)');
|
|
|
|
timeseq = 0
|
|
lasttime = long(0)
|
|
timelock = Lock()
|
|
|
|
def gettimeseq():
|
|
global lasttime, timeseq, timelock
|
|
timelock.acquire()
|
|
try:
|
|
thistime = long(time.time())
|
|
if thistime == lasttime:
|
|
timeseq += 1
|
|
return (thistime, timeseq)
|
|
else:
|
|
lasttime = thistime
|
|
timeseq = 0
|
|
return (thistime, timeseq)
|
|
finally:
|
|
timelock.release()
|
|
|
|
class MaildirFolder(BaseFolder):
|
|
def __init__(self, root, name, sep, repository, accountname, config):
|
|
self.name = name
|
|
self.config = config
|
|
self.dofsync = config.getdefaultboolean("general", "fsync", True)
|
|
self.root = root
|
|
self.sep = sep
|
|
self.messagelist = None
|
|
self.repository = repository
|
|
self.accountname = accountname
|
|
BaseFolder.__init__(self)
|
|
|
|
def getaccountname(self):
|
|
return self.accountname
|
|
|
|
def getfullname(self):
|
|
return os.path.join(self.getroot(), self.getname())
|
|
|
|
def getuidvalidity(self):
|
|
"""Maildirs have no notion of uidvalidity, so we just return a magic
|
|
token."""
|
|
return 42
|
|
|
|
#Checks to see if the given message is within the maximum age according
|
|
#to the maildir name which should begin with a timestamp
|
|
def _iswithinmaxage(self, messagename, maxage):
|
|
#In order to have the same behaviour as SINCE in an IMAP search
|
|
#we must convert this to the oldest time and then strip off hrs/mins
|
|
#from that day
|
|
oldest_time_utc = time.time() - (60*60*24*maxage)
|
|
oldest_time_struct = time.gmtime(oldest_time_utc)
|
|
oldest_time_today_seconds = ((oldest_time_struct[3] * 3600) \
|
|
+ (oldest_time_struct[4] * 60) \
|
|
+ oldest_time_struct[5])
|
|
oldest_time_utc -= oldest_time_today_seconds
|
|
|
|
timestampmatch = timestampmatchre.search(messagename)
|
|
timestampstr = timestampmatch.group()
|
|
timestamplong = long(timestampstr)
|
|
if(timestamplong < oldest_time_utc):
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
|
|
def _scanfolder(self):
|
|
"""Cache the message list. Maildir flags are:
|
|
R (replied)
|
|
S (seen)
|
|
T (trashed)
|
|
D (draft)
|
|
F (flagged)
|
|
and must occur in ASCII order."""
|
|
retval = {}
|
|
files = []
|
|
nouidcounter = -1 # Messages without UIDs get
|
|
# negative UID numbers.
|
|
foldermd5 = md5(self.getvisiblename()).hexdigest()
|
|
folderstr = ',FMD5=' + foldermd5
|
|
for dirannex in ['new', 'cur']:
|
|
fulldirname = os.path.join(self.getfullname(), dirannex)
|
|
files.extend(os.path.join(fulldirname, filename) for
|
|
filename in os.listdir(fulldirname))
|
|
for file in files:
|
|
messagename = os.path.basename(file)
|
|
|
|
#check if there is a parameter for maxage / maxsize - then see if this
|
|
#message should be considered or not
|
|
maxage = self.config.getdefaultint("Account " + self.accountname, "maxage", -1)
|
|
maxsize = self.config.getdefaultint("Account " + self.accountname, "maxsize", -1)
|
|
|
|
if(maxage != -1):
|
|
isnewenough = self._iswithinmaxage(messagename, maxage)
|
|
if(isnewenough != True):
|
|
#this message is older than we should consider....
|
|
continue
|
|
|
|
#Check and see if the message is too big if the maxsize for this account is set
|
|
if(maxsize != -1):
|
|
filesize = os.path.getsize(file)
|
|
if(filesize > maxsize):
|
|
continue
|
|
|
|
|
|
foldermatch = messagename.find(folderstr) != -1
|
|
if not foldermatch:
|
|
# If there is no folder MD5 specified, or if it mismatches,
|
|
# assume it is a foreign (new) message and generate a
|
|
# negative uid for it
|
|
uid = nouidcounter
|
|
nouidcounter -= 1
|
|
else: # It comes from our folder.
|
|
uidmatch = uidmatchre.search(messagename)
|
|
uid = None
|
|
if not uidmatch:
|
|
uid = nouidcounter
|
|
nouidcounter -= 1
|
|
else:
|
|
uid = long(uidmatch.group(1))
|
|
flagmatch = flagmatchre.search(messagename)
|
|
flags = []
|
|
if flagmatch:
|
|
flags = [x for x in flagmatch.group(1)]
|
|
flags.sort()
|
|
retval[uid] = {'uid': uid,
|
|
'flags': flags,
|
|
'filename': file}
|
|
return retval
|
|
|
|
def quickchanged(self, statusfolder):
|
|
self.cachemessagelist()
|
|
savedmessages = statusfolder.getmessagelist()
|
|
if len(self.messagelist) != len(savedmessages):
|
|
return True
|
|
for uid in self.messagelist.keys():
|
|
if uid not in savedmessages:
|
|
return True
|
|
if self.messagelist[uid]['flags'] != savedmessages[uid]['flags']:
|
|
return True
|
|
return False
|
|
|
|
def cachemessagelist(self):
|
|
if self.messagelist is None:
|
|
self.messagelist = self._scanfolder()
|
|
|
|
def getmessagelist(self):
|
|
return self.messagelist
|
|
|
|
def getmessage(self, uid):
|
|
filename = self.messagelist[uid]['filename']
|
|
file = open(filename, 'rt')
|
|
retval = file.read()
|
|
file.close()
|
|
return retval.replace("\r\n", "\n")
|
|
|
|
def getmessagetime( self, uid ):
|
|
filename = self.messagelist[uid]['filename']
|
|
st = os.stat(filename)
|
|
return st.st_mtime
|
|
|
|
def savemessage(self, uid, content, flags, rtime):
|
|
# This function only ever saves to tmp/,
|
|
# but it calls savemessageflags() to actually save to cur/ or new/.
|
|
ui = UIBase.getglobalui()
|
|
ui.debug('maildir', 'savemessage: called to write with flags %s and content %s' % \
|
|
(repr(flags), repr(content)))
|
|
if uid < 0:
|
|
# We cannot assign a new uid.
|
|
return uid
|
|
if uid in self.messagelist:
|
|
# We already have it.
|
|
self.savemessageflags(uid, flags)
|
|
return uid
|
|
|
|
# Otherwise, save the message in tmp/ and then call savemessageflags()
|
|
# to give it a permanent home.
|
|
tmpdir = os.path.join(self.getfullname(), 'tmp')
|
|
messagename = None
|
|
attempts = 0
|
|
while 1:
|
|
if attempts > 15:
|
|
raise IOError, "Couldn't write to file %s" % messagename
|
|
timeval, timeseq = gettimeseq()
|
|
messagename = '%d_%d.%d.%s,U=%d,FMD5=%s' % \
|
|
(timeval,
|
|
timeseq,
|
|
os.getpid(),
|
|
socket.gethostname(),
|
|
uid,
|
|
md5(self.getvisiblename()).hexdigest())
|
|
if os.path.exists(os.path.join(tmpdir, messagename)):
|
|
time.sleep(2)
|
|
attempts += 1
|
|
else:
|
|
break
|
|
tmpmessagename = messagename.split(',')[0]
|
|
ui.debug('maildir', 'savemessage: using temporary name %s' % tmpmessagename)
|
|
file = open(os.path.join(tmpdir, tmpmessagename), "wt")
|
|
file.write(content)
|
|
|
|
# Make sure the data hits the disk
|
|
file.flush()
|
|
if self.dofsync:
|
|
os.fsync(file.fileno())
|
|
|
|
file.close()
|
|
if rtime != None:
|
|
os.utime(os.path.join(tmpdir,tmpmessagename), (rtime,rtime))
|
|
ui.debug('maildir', 'savemessage: moving from %s to %s' % \
|
|
(tmpmessagename, messagename))
|
|
if tmpmessagename != messagename: # then rename it
|
|
os.rename(os.path.join(tmpdir, tmpmessagename),
|
|
os.path.join(tmpdir, messagename))
|
|
|
|
if self.dofsync:
|
|
try:
|
|
# fsync the directory (safer semantics in Linux)
|
|
fd = os.open(tmpdir, os.O_RDONLY)
|
|
os.fsync(fd)
|
|
os.close(fd)
|
|
except:
|
|
pass
|
|
|
|
self.messagelist[uid] = {'uid': uid, 'flags': [],
|
|
'filename': os.path.join(tmpdir, messagename)}
|
|
self.savemessageflags(uid, flags)
|
|
ui.debug('maildir', 'savemessage: returning uid %d' % uid)
|
|
return uid
|
|
|
|
def getmessageflags(self, uid):
|
|
return self.messagelist[uid]['flags']
|
|
|
|
def savemessageflags(self, uid, flags):
|
|
oldfilename = self.messagelist[uid]['filename']
|
|
newpath, newname = os.path.split(oldfilename)
|
|
tmpdir = os.path.join(self.getfullname(), 'tmp')
|
|
if 'S' in flags:
|
|
# If a message has been seen, it goes into the cur
|
|
# directory. CR debian#152482, [complete.org #4]
|
|
newpath = os.path.join(self.getfullname(), 'cur')
|
|
else:
|
|
newpath = os.path.join(self.getfullname(), 'new')
|
|
infostr = ':'
|
|
infomatch = re.search('(:.*)$', newname)
|
|
if infomatch: # If the info string is present..
|
|
infostr = infomatch.group(1)
|
|
newname = newname.split(':')[0] # Strip off the info string.
|
|
infostr = re.sub('2,[A-Z]*', '', infostr)
|
|
flags.sort()
|
|
infostr += '2,' + ''.join(flags)
|
|
newname += infostr
|
|
|
|
newfilename = os.path.join(newpath, newname)
|
|
if (newfilename != oldfilename):
|
|
os.rename(oldfilename, newfilename)
|
|
self.messagelist[uid]['flags'] = flags
|
|
self.messagelist[uid]['filename'] = newfilename
|
|
|
|
# By now, the message had better not be in tmp/ land!
|
|
final_dir, final_name = os.path.split(self.messagelist[uid]['filename'])
|
|
assert final_dir != tmpdir
|
|
|
|
def deletemessage(self, uid):
|
|
if not uid in self.messagelist:
|
|
return
|
|
filename = self.messagelist[uid]['filename']
|
|
try:
|
|
os.unlink(filename)
|
|
except OSError:
|
|
# Can't find the file -- maybe already deleted?
|
|
newmsglist = self._scanfolder()
|
|
if uid in newmsglist: # Nope, try new filename.
|
|
os.unlink(newmsglist[uid]['filename'])
|
|
# Yep -- return.
|
|
del(self.messagelist[uid])
|
|
|