Refactor parsing out maildirs filename components

Create a helper function that retrieves the UID, folder MD5, and Flags from
a message filename.

We need these items when we simply want to rename (=new UID) a Maildir
message file later. The new function can give us these components.

Rework, so we cache the calculation of the folder's md5 value once, it
never changes and we call it a lot.

Signed-off-by: Sebastian Spaeth <Sebastian@SSpaeth.de>
This commit is contained in:
Sebastian Spaeth 2011-08-30 11:01:49 +02:00
parent de4f8c8605
commit 6fe808338c

View File

@ -1,6 +1,5 @@
# Maildir folder support # Maildir folder support
# Copyright (C) 2002 - 2007 John Goerzen # Copyright (C) 2002 - 2011 John Goerzen & contributors
# <jgoerzen@complete.org>
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
@ -35,8 +34,10 @@ except NameError:
from offlineimap import OfflineImapError from offlineimap import OfflineImapError
uidmatchre = re.compile(',U=(\d+)') # Find the UID in a message filename
timestampmatchre = re.compile('(\d+)'); re_uidmatch = re.compile(',U=(\d+)')
# Find a numeric timestamp in a string (filename prefix)
re_timestampmatch = re.compile('(\d+)');
timeseq = 0 timeseq = 0
lasttime = long(0) lasttime = long(0)
@ -67,11 +68,14 @@ class MaildirFolder(BaseFolder):
# check if we should use a different infosep to support Win file systems # check if we should use a different infosep to support Win file systems
self.wincompatible = self.config.getdefaultboolean( self.wincompatible = self.config.getdefaultboolean(
"Account "+self.accountname, "maildir-windows-compatible", False) "Account "+self.accountname, "maildir-windows-compatible", False)
self.infosep = '!' if self.wincompatible else ':' self.infosep = '!' if self.wincompatible else ':'
"""infosep is the separator between maildir name and flag appendix""" """infosep is the separator between maildir name and flag appendix"""
self.flagmatchre = re.compile('(%s2,)(\w*)' % self.infosep) self.re_flagmatch = re.compile('%s2,(\w*)' % self.infosep)
#self.ui is set in BaseFolder.init() #self.ui is set in BaseFolder.init()
# Everything up to the first comma or colon (or ! if Windows):
self.re_prefixmatch = re.compile('([^'+ self.infosep + ',]*)')
#folder's md, so we can match with recorded file md5 for validity
self._foldermd5 = md5(self.getvisiblename()).hexdigest()
# Cache the full folder path, as we use getfullname() very often # Cache the full folder path, as we use getfullname() very often
self._fullname = os.path.join(self.getroot(), self.getname()) self._fullname = os.path.join(self.getroot(), self.getname())
@ -97,7 +101,7 @@ class MaildirFolder(BaseFolder):
+ oldest_time_struct[5]) + oldest_time_struct[5])
oldest_time_utc -= oldest_time_today_seconds oldest_time_utc -= oldest_time_today_seconds
timestampmatch = timestampmatchre.search(messagename) timestampmatch = re_timestampmatch.search(messagename)
timestampstr = timestampmatch.group() timestampstr = timestampmatch.group()
timestamplong = long(timestampstr) timestamplong = long(timestampstr)
if(timestamplong < oldest_time_utc): if(timestamplong < oldest_time_utc):
@ -105,68 +109,80 @@ class MaildirFolder(BaseFolder):
else: else:
return True return True
def _parse_filename(self, filename):
"""Returns a messages file name components
Receives the file name (without path) of a msg. Usual format is
'<%d_%d.%d.%s>,U=<%d>,FMD5=<%s>:2,<FLAGS>' (pointy brackets
denoting the various components).
If FMD5 does not correspond with the current folder MD5, we will
return None for the UID & FMD5 (as it is not valid in this
folder). If UID or FMD5 can not be detected, we return `None`
for the respective element. If flags are empty or cannot be
detected, we return an empty flags list.
:returns: (prefix, UID, FMD5, flags). UID is a numeric "long"
type. flags is a set() of Maildir flags"""
prefix, uid, fmd5, flags = None, None, None, set()
prefixmatch = self.re_prefixmatch.match(filename)
if prefixmatch:
prefix = prefixmatch.group(1)
folderstr = ',FMD5=%s' % self._foldermd5
foldermatch = folderstr in filename
# If there was no folder MD5 specified, or if it mismatches,
# assume it is a foreign (new) message and ret: uid, fmd5 = None, None
if foldermatch:
uidmatch = re_uidmatch.search(filename)
if uidmatch:
uid = long(uidmatch.group(1))
flagmatch = self.re_flagmatch.search(filename)
if flagmatch:
flags = set(flagmatch.group(1))
return prefix, uid, fmd5, flags
def _scanfolder(self): def _scanfolder(self):
"""Cache the message list. Maildir flags are: """Cache the message list from a Maildir.
R (replied)
S (seen) Maildir flags are: R (replied) S (seen) T (trashed) D (draft) F
T (trashed) (flagged).
D (draft) :returns: dict that can be used as self.messagelist"""
F (flagged) maxage = self.config.getdefaultint("Account " + self.accountname,
and must occur in ASCII order.""" "maxage", None)
maxsize = self.config.getdefaultint("Account " + self.accountname,
"maxsize", None)
retval = {} retval = {}
files = [] files = []
nouidcounter = -1 # Messages without UIDs get nouidcounter = -1 # Messages without UIDs get negative UIDs.
# negative UID numbers.
foldermd5 = md5(self.getvisiblename()).hexdigest()
folderstr = ',FMD5=' + foldermd5
for dirannex in ['new', 'cur']: for dirannex in ['new', 'cur']:
fulldirname = os.path.join(self.getfullname(), dirannex) fulldirname = os.path.join(self.getfullname(), dirannex)
files.extend(os.path.join(dirannex, filename) for files.extend((dirannex, filename) for
filename in os.listdir(fulldirname)) filename in os.listdir(fulldirname))
for file in files:
messagename = os.path.basename(file)
#check if there is a parameter for maxage / maxsize - then see if this for dirannex, filename in files:
#message should be considered or not # We store just dirannex and filename, ie 'cur/123...'
maxage = self.config.getdefaultint("Account " + self.accountname, "maxage", -1) filepath = os.path.join(dirannex, filename)
maxsize = self.config.getdefaultint("Account " + self.accountname, "maxsize", -1) # check maxage/maxsize if this message should be considered
if maxage and not self._iswithinmaxage(filename, maxage):
continue
if maxsize and (os.path.getsize(os.path.join(
self.getfullname(), filepath)) > maxsize):
continue
if(maxage != -1): (prefix, uid, fmd5, flags) = self._parse_filename(filename)
isnewenough = self._iswithinmaxage(messagename, maxage) if uid is None: # assign negative uid to upload it.
if(isnewenough != True):
#this message is older than we should consider....
continue
#Check and see if the message is too big if the maxsize for this account is set
if(maxsize != -1):
size = os.path.getsize(os.path.join(self.getfullname(), file))
if(size > maxsize):
continue
foldermatch = messagename.find(folderstr) != -1
if not foldermatch:
# If there is no folder MD5 specified, or if it mismatches,
# assume it is a foreign (new) message and generate a
# negative uid for it
uid = nouidcounter uid = nouidcounter
nouidcounter -= 1 nouidcounter -= 1
else: # It comes from our folder. else: # It comes from our folder.
uidmatch = uidmatchre.search(messagename) uidmatch = re_uidmatch.search(filename)
uid = None uid = None
if not uidmatch: if not uidmatch:
uid = nouidcounter uid = nouidcounter
nouidcounter -= 1 nouidcounter -= 1
else: else:
uid = long(uidmatch.group(1)) uid = long(uidmatch.group(1))
#identify flags in the path name
flagmatch = self.flagmatchre.search(messagename)
if flagmatch:
flags = set(flagmatch.group(2))
else:
flags = set()
# 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S # 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S
retval[uid] = {'flags': flags, 'filename': file} retval[uid] = {'flags': flags, 'filename': filepath}
return retval return retval
def quickchanged(self, statusfolder): def quickchanged(self, statusfolder):