John Goerzen 3305d8cd4d Daniel Jacobowitz patches
fixes deb#433732

Date: Sun, 30 Sep 2007 13:54:56 -0400
From: Daniel Jacobowitz <drow@false.org>
To: offlineimap@complete.org
Subject: Assorted patches

Here's the result of a lazy Sunday hacking on offlineimap.  Sorry for
not breaking this into multiple patches.  They're mostly logically
independent so just ask if that would make a difference.
First, a new -q (quick) option.  The quick option means to only update
folders that seem to have had significant changes.  For Maildir, any
change to any message UID or flags is significant, because checking
the flags doesn't add a significant cost.  For IMAP, only a change to
the total number of messages or a change in the UID of the most recent
message is significant.  This should catch everything except for
flags changes.

The difference in bandwidth is astonishing: a quick sync takes 80K
instead of 5.3MB, and 28 seconds instead of 90.

There's a configuration variable that lets you say every tenth sync
should update flags, but let all the intervening ones be lighter.


Second, a fix to the UID validity problems many people have been
reporting with Courier.  As discussed in Debian bug #433732, I changed
the UID validity check to use SELECT unless the server complains that
the folder is read-only.  This avoids the Courier bug (see the Debian
log for more details).  This won't fix existing validity errors, you
need to remove the local status and validity files by hand and resync.


Third, some speedups in Maildir checking.  It's still pretty slow
due to a combination of poor performance in os.listdir (never reads
more than 4K of directory entries at a time) and some semaphore that
leads to lots of futex wake operations, but at least this saves
20% or so of the CPU time running offlineimap on a single folder:

Time with quick refresh and md5 in loop: 4.75s user 0.46s system 12%
cpu 41.751 total
Time with quick refresh and md5 out of loop: 4.38s user 0.50s system
14% cpu 34.799 total
Time using string compare to check folder: 4.11s user 0.47s system 13%
cpu 34.788 total


And fourth, some display fixes for Curses.Blinkenlights.  I made
warnings more visible, made the new quick sync message cyan, and
made all not explicitly colored messages grey.  That last one was
really bugging me.  Any time OfflineIMAP printed a warning in
this UI, it had even odds of coming out black on black!


Anyway, I hope these are useful.  I'm happy to revise them if you see
a problem.

-- 
Daniel Jacobowitz
CodeSourcery
2007-10-01 22:20:37 +01:00

259 lines
9.4 KiB
Python

# Maildir folder support
# Copyright (C) 2002 - 2007 John Goerzen
# <jgoerzen@complete.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
from Base import BaseFolder
from offlineimap import imaputil
from offlineimap.ui import UIBase
from threading import Lock
import os.path, os, re, time, socket, md5
uidmatchre = re.compile(',U=(\d+)')
flagmatchre = re.compile(':.*2,([A-Z]+)')
timeseq = 0
lasttime = long(0)
timelock = Lock()
def gettimeseq():
global lasttime, timeseq, timelock
timelock.acquire()
try:
thistime = long(time.time())
if thistime == lasttime:
timeseq += 1
return (thistime, timeseq)
else:
lasttime = thistime
timeseq = 0
return (thistime, timeseq)
finally:
timelock.release()
class MaildirFolder(BaseFolder):
def __init__(self, root, name, sep, repository, accountname):
self.name = name
self.root = root
self.sep = sep
self.messagelist = None
self.repository = repository
self.accountname = accountname
BaseFolder.__init__(self)
def getaccountname(self):
return self.accountname
def getfullname(self):
return os.path.join(self.getroot(), self.getname())
def getuidvalidity(self):
"""Maildirs have no notion of uidvalidity, so we just return a magic
token."""
return 42
def _scanfolder(self):
"""Cache the message list. Maildir flags are:
R (replied)
S (seen)
T (trashed)
D (draft)
F (flagged)
and must occur in ASCII order."""
retval = {}
files = []
nouidcounter = -1 # Messages without UIDs get
# negative UID numbers.
foldermd5 = md5.new(self.getvisiblename()).hexdigest()
folderstr = ',FMD5=' + foldermd5
for dirannex in ['new', 'cur']:
fulldirname = os.path.join(self.getfullname(), dirannex)
files.extend([os.path.join(fulldirname, filename) for
filename in os.listdir(fulldirname)])
for file in files:
messagename = os.path.basename(file)
foldermatch = messagename.find(folderstr) != -1
if not foldermatch:
# If there is no folder MD5 specified, or if it mismatches,
# assume it is a foreign (new) message and generate a
# negative uid for it
uid = nouidcounter
nouidcounter -= 1
else: # It comes from our folder.
uidmatch = uidmatchre.search(messagename)
uid = None
if not uidmatch:
uid = nouidcounter
nouidcounter -= 1
else:
uid = long(uidmatch.group(1))
flagmatch = flagmatchre.search(messagename)
flags = []
if flagmatch:
flags = [x for x in flagmatch.group(1)]
flags.sort()
retval[uid] = {'uid': uid,
'flags': flags,
'filename': file}
return retval
def quickchanged(self, statusfolder):
self.cachemessagelist()
savedmessages = statusfolder.getmessagelist()
if len(self.messagelist) != len(savedmessages):
return True
for uid in self.messagelist.keys():
if uid not in savedmessages:
return True
if self.messagelist[uid]['flags'] != savedmessages[uid]['flags']:
return True
return False
def cachemessagelist(self):
if self.messagelist is None:
self.messagelist = self._scanfolder()
def getmessagelist(self):
return self.messagelist
def getmessage(self, uid):
filename = self.messagelist[uid]['filename']
file = open(filename, 'rt')
retval = file.read()
file.close()
return retval.replace("\r\n", "\n")
def getmessagetime( self, uid ):
filename = self.messagelist[uid]['filename']
st = os.stat(filename)
return st.st_mtime
def savemessage(self, uid, content, flags, rtime):
# This function only ever saves to tmp/,
# but it calls savemessageflags() to actually save to cur/ or new/.
ui = UIBase.getglobalui()
ui.debug('maildir', 'savemessage: called to write with flags %s and content %s' % \
(repr(flags), repr(content)))
if uid < 0:
# We cannot assign a new uid.
return uid
if uid in self.messagelist:
# We already have it.
self.savemessageflags(uid, flags)
return uid
# Otherwise, save the message in tmp/ and then call savemessageflags()
# to give it a permanent home.
tmpdir = os.path.join(self.getfullname(), 'tmp')
messagename = None
attempts = 0
while 1:
if attempts > 15:
raise IOError, "Couldn't write to file %s" % messagename
timeval, timeseq = gettimeseq()
messagename = '%d_%d.%d.%s,U=%d,FMD5=%s' % \
(timeval,
timeseq,
os.getpid(),
socket.gethostname(),
uid,
md5.new(self.getvisiblename()).hexdigest())
if os.path.exists(os.path.join(tmpdir, messagename)):
time.sleep(2)
attempts += 1
else:
break
tmpmessagename = messagename.split(',')[0]
ui.debug('maildir', 'savemessage: using temporary name %s' % tmpmessagename)
file = open(os.path.join(tmpdir, tmpmessagename), "wt")
file.write(content)
# Make sure the data hits the disk
file.flush()
os.fsync(file.fileno())
file.close()
if rtime != None:
os.utime(os.path.join(tmpdir,tmpmessagename), (rtime,rtime))
ui.debug('maildir', 'savemessage: moving from %s to %s' % \
(tmpmessagename, messagename))
if tmpmessagename != messagename: # then rename it
os.link(os.path.join(tmpdir, tmpmessagename),
os.path.join(tmpdir, messagename))
os.unlink(os.path.join(tmpdir, tmpmessagename))
try:
# fsync the directory (safer semantics in Linux)
fd = os.open(tmpdir, os.O_RDONLY)
os.fsync(fd)
os.close(fd)
except:
pass
self.messagelist[uid] = {'uid': uid, 'flags': [],
'filename': os.path.join(tmpdir, messagename)}
self.savemessageflags(uid, flags)
ui.debug('maildir', 'savemessage: returning uid %d' % uid)
return uid
def getmessageflags(self, uid):
return self.messagelist[uid]['flags']
def savemessageflags(self, uid, flags):
oldfilename = self.messagelist[uid]['filename']
newpath, newname = os.path.split(oldfilename)
tmpdir = os.path.join(self.getfullname(), 'tmp')
if 'S' in flags:
# If a message has been seen, it goes into the cur
# directory. CR debian#152482, [complete.org #4]
newpath = os.path.join(self.getfullname(), 'cur')
else:
newpath = os.path.join(self.getfullname(), 'new')
infostr = ':'
infomatch = re.search('(:.*)$', newname)
if infomatch: # If the info string is present..
infostr = infomatch.group(1)
newname = newname.split(':')[0] # Strip off the info string.
infostr = re.sub('2,[A-Z]*', '', infostr)
flags.sort()
infostr += '2,' + ''.join(flags)
newname += infostr
newfilename = os.path.join(newpath, newname)
if (newfilename != oldfilename):
os.rename(oldfilename, newfilename)
self.messagelist[uid]['flags'] = flags
self.messagelist[uid]['filename'] = newfilename
# By now, the message had better not be in tmp/ land!
final_dir, final_name = os.path.split(self.messagelist[uid]['filename'])
assert final_dir != tmpdir
def deletemessage(self, uid):
if not uid in self.messagelist:
return
filename = self.messagelist[uid]['filename']
try:
os.unlink(filename)
except OSError:
# Can't find the file -- maybe already deleted?
newmsglist = self._scanfolder()
if uid in newmsglist: # Nope, try new filename.
os.unlink(newmsglist[uid]['filename'])
# Yep -- return.
del(self.messagelist[uid])