From 6fe808338c1826a66eb507830be023dc1db9978a Mon Sep 17 00:00:00 2001 From: Sebastian Spaeth Date: Tue, 30 Aug 2011 11:01:49 +0200 Subject: [PATCH] Refactor parsing out maildirs filename components Create a helper function that retrieves the UID, folder MD5, and Flags from a message filename. We need these items when we simply want to rename (=new UID) a Maildir message file later. The new function can give us these components. Rework, so we cache the calculation of the folder's md5 value once, it never changes and we call it a lot. Signed-off-by: Sebastian Spaeth --- offlineimap/folder/Maildir.py | 116 +++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 50 deletions(-) diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index c9ea63d..dc2e368 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -1,6 +1,5 @@ # Maildir folder support -# Copyright (C) 2002 - 2007 John Goerzen -# +# Copyright (C) 2002 - 2011 John Goerzen & contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -35,8 +34,10 @@ except NameError: from offlineimap import OfflineImapError -uidmatchre = re.compile(',U=(\d+)') -timestampmatchre = re.compile('(\d+)'); +# Find the UID in a message filename +re_uidmatch = re.compile(',U=(\d+)') +# Find a numeric timestamp in a string (filename prefix) +re_timestampmatch = re.compile('(\d+)'); timeseq = 0 lasttime = long(0) @@ -67,11 +68,14 @@ class MaildirFolder(BaseFolder): # check if we should use a different infosep to support Win file systems self.wincompatible = self.config.getdefaultboolean( "Account "+self.accountname, "maildir-windows-compatible", False) - self.infosep = '!' if self.wincompatible else ':' """infosep is the separator between maildir name and flag appendix""" - self.flagmatchre = re.compile('(%s2,)(\w*)' % self.infosep) + self.re_flagmatch = re.compile('%s2,(\w*)' % self.infosep) #self.ui is set in BaseFolder.init() + # Everything up to the first comma or colon (or ! if Windows): + self.re_prefixmatch = re.compile('([^'+ self.infosep + ',]*)') + #folder's md, so we can match with recorded file md5 for validity + self._foldermd5 = md5(self.getvisiblename()).hexdigest() # Cache the full folder path, as we use getfullname() very often self._fullname = os.path.join(self.getroot(), self.getname()) @@ -97,7 +101,7 @@ class MaildirFolder(BaseFolder): + oldest_time_struct[5]) oldest_time_utc -= oldest_time_today_seconds - timestampmatch = timestampmatchre.search(messagename) + timestampmatch = re_timestampmatch.search(messagename) timestampstr = timestampmatch.group() timestamplong = long(timestampstr) if(timestamplong < oldest_time_utc): @@ -105,68 +109,80 @@ class MaildirFolder(BaseFolder): else: return True + def _parse_filename(self, filename): + """Returns a messages file name components + + Receives the file name (without path) of a msg. Usual format is + '<%d_%d.%d.%s>,U=<%d>,FMD5=<%s>:2,' (pointy brackets + denoting the various components). + + If FMD5 does not correspond with the current folder MD5, we will + return None for the UID & FMD5 (as it is not valid in this + folder). If UID or FMD5 can not be detected, we return `None` + for the respective element. If flags are empty or cannot be + detected, we return an empty flags list. + + :returns: (prefix, UID, FMD5, flags). UID is a numeric "long" + type. flags is a set() of Maildir flags""" + prefix, uid, fmd5, flags = None, None, None, set() + prefixmatch = self.re_prefixmatch.match(filename) + if prefixmatch: + prefix = prefixmatch.group(1) + folderstr = ',FMD5=%s' % self._foldermd5 + foldermatch = folderstr in filename + # If there was no folder MD5 specified, or if it mismatches, + # assume it is a foreign (new) message and ret: uid, fmd5 = None, None + if foldermatch: + uidmatch = re_uidmatch.search(filename) + if uidmatch: + uid = long(uidmatch.group(1)) + flagmatch = self.re_flagmatch.search(filename) + if flagmatch: + flags = set(flagmatch.group(1)) + return prefix, uid, fmd5, flags def _scanfolder(self): - """Cache the message list. Maildir flags are: - R (replied) - S (seen) - T (trashed) - D (draft) - F (flagged) - and must occur in ASCII order.""" + """Cache the message list from a Maildir. + + Maildir flags are: R (replied) S (seen) T (trashed) D (draft) F + (flagged). + :returns: dict that can be used as self.messagelist""" + maxage = self.config.getdefaultint("Account " + self.accountname, + "maxage", None) + maxsize = self.config.getdefaultint("Account " + self.accountname, + "maxsize", None) retval = {} files = [] - nouidcounter = -1 # Messages without UIDs get - # negative UID numbers. - foldermd5 = md5(self.getvisiblename()).hexdigest() - folderstr = ',FMD5=' + foldermd5 + nouidcounter = -1 # Messages without UIDs get negative UIDs. for dirannex in ['new', 'cur']: fulldirname = os.path.join(self.getfullname(), dirannex) - files.extend(os.path.join(dirannex, filename) for + files.extend((dirannex, filename) for filename in os.listdir(fulldirname)) - for file in files: - messagename = os.path.basename(file) - #check if there is a parameter for maxage / maxsize - then see if this - #message should be considered or not - maxage = self.config.getdefaultint("Account " + self.accountname, "maxage", -1) - maxsize = self.config.getdefaultint("Account " + self.accountname, "maxsize", -1) + for dirannex, filename in files: + # We store just dirannex and filename, ie 'cur/123...' + filepath = os.path.join(dirannex, filename) + # check maxage/maxsize if this message should be considered + if maxage and not self._iswithinmaxage(filename, maxage): + continue + if maxsize and (os.path.getsize(os.path.join( + self.getfullname(), filepath)) > maxsize): + continue - if(maxage != -1): - isnewenough = self._iswithinmaxage(messagename, maxage) - if(isnewenough != True): - #this message is older than we should consider.... - continue - - #Check and see if the message is too big if the maxsize for this account is set - if(maxsize != -1): - size = os.path.getsize(os.path.join(self.getfullname(), file)) - if(size > maxsize): - continue - - foldermatch = messagename.find(folderstr) != -1 - if not foldermatch: - # If there is no folder MD5 specified, or if it mismatches, - # assume it is a foreign (new) message and generate a - # negative uid for it + (prefix, uid, fmd5, flags) = self._parse_filename(filename) + if uid is None: # assign negative uid to upload it. uid = nouidcounter nouidcounter -= 1 else: # It comes from our folder. - uidmatch = uidmatchre.search(messagename) + uidmatch = re_uidmatch.search(filename) uid = None if not uidmatch: uid = nouidcounter nouidcounter -= 1 else: uid = long(uidmatch.group(1)) - #identify flags in the path name - flagmatch = self.flagmatchre.search(messagename) - if flagmatch: - flags = set(flagmatch.group(2)) - else: - flags = set() # 'filename' is 'dirannex/filename', e.g. cur/123,U=1,FMD5=1:2,S - retval[uid] = {'flags': flags, 'filename': file} + retval[uid] = {'flags': flags, 'filename': filepath} return retval def quickchanged(self, statusfolder):