Patch for maxage and maxsize options
Dear All, Attached is the patch that I have developed to provide maxage and maxsize options. You can thus sync only the last x days of messages and exclude large messages. All details in the attached git file. Regards, -Mike -- Attached file included as plaintext by Ecartis -- -- File: submit From 04fead2b46a79675a5b29de6f2b4088b9c9448e5 Mon Sep 17 00:00:00 2001 From: mike <mike@mikelaptop.(none)> Date: Sun, 16 Aug 2009 17:00:49 +0430 Subject: [PATCH] Patch to provide maxage and maxsize account options to exclude old/large messages This is designed to make offlineimap even better for low bandwidth connections. maxage allows you to specify a number of days and only messages within that range will be considered by offlineimap for the sync. This can be useful if you would like to start using offlineimap with a large existing account and do not want to import large archives of mail. maxsize allows you to specify the maximum size of a message to consider so that you can exclude messages with large attachments etc. In both cases the cachemessagelist function of the folder was modified to ignore messages that do not meet the criteria. If the criteria are not specified then the existing code will be executed the same as before. If a message does not meet the criteria it will be as though this message does not exist - offlineimap will completely ignore it. It will not have flags updated, it will not be deleted, it will not be considered at all. When operating against an IMAP repository a server side search function is used. This of course requires support for server side search. I have tested this with either option, no options etc. against IMAP, Maildir and Gmail. I have run variations of this patch here for the last 3 weeks or so syncing about 4 accounts normally.
This commit is contained in:
		 Mike Dawson
					Mike Dawson
				
			
				
					committed by
					
						 John Goerzen
						John Goerzen
					
				
			
			
				
	
			
			
			 John Goerzen
						John Goerzen
					
				
			
						parent
						
							312b91a1a5
						
					
				
				
					commit
					cde94e5047
				
			| @@ -195,6 +195,30 @@ remoterepository = RemoteExample | |||||||
| # You can also specify parameters to the commands | # You can also specify parameters to the commands | ||||||
| # presynchook = imapfilter -c someotherconfig.lua | # presynchook = imapfilter -c someotherconfig.lua | ||||||
|  |  | ||||||
|  | # If you have a limited amount of bandwidth available you can exclude larger | ||||||
|  | # messages (e.g. those with large attachments etc).  If you do this it | ||||||
|  | # will appear to offlineimap that these messages do not exist at all.  They | ||||||
|  | # will not be copied, have flags changed etc.  For this to work on an IMAP | ||||||
|  | # server the server must have server side search enabled.  This works with gmail | ||||||
|  | # and most imap servers (e.g. cyrus etc) | ||||||
|  | # The maximum size should be specified in bytes - e.g. 2000000 for approx 2MB | ||||||
|  |  | ||||||
|  | # maxsize = 2000000 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # When you are starting to sync an already existing account yuo can tell offlineimap | ||||||
|  | # to sync messages from only the last x days.  When you do this messages older than x | ||||||
|  | # days will be completely ignored.  This can be useful for importing existing accounts | ||||||
|  | # when you do not want to download large amounts of archive email. | ||||||
|  |  | ||||||
|  | # Messages older than maxage days will not be synced, their flags will | ||||||
|  | # not be changed, they will not be deleted etc.  For offlineimap it will be like these | ||||||
|  | # messages do not exist.  This will perform an IMAP search in the case of IMAP or Gmail | ||||||
|  | # and therefor requires that the server support server side searching.  This will | ||||||
|  | # calculate the earliest day that would be included in the search and include all  | ||||||
|  | # messages from that day until today.   e.g. maxage = 3 to sync only the last 3 days mail | ||||||
|  |  | ||||||
|  | # maxage = 3 | ||||||
|  |  | ||||||
| [Repository LocalExample] | [Repository LocalExample] | ||||||
|  |  | ||||||
|   | |||||||
| @@ -182,16 +182,22 @@ class AccountSynchronizationMixin: | |||||||
|  |  | ||||||
|         # Connect to the local cache. |         # Connect to the local cache. | ||||||
|         self.statusrepos = offlineimap.repository.LocalStatus.LocalStatusRepository(self.getconf('localrepository'), self) |         self.statusrepos = offlineimap.repository.LocalStatus.LocalStatusRepository(self.getconf('localrepository'), self) | ||||||
|              |  | ||||||
|  |         #might need changes here to ensure that one account sync does not crash others... | ||||||
|         if not self.refreshperiod: |         if not self.refreshperiod: | ||||||
|  |              | ||||||
|             self.sync(siglistener) |             self.sync(siglistener) | ||||||
|             self.ui.acctdone(self.name) |             self.ui.acctdone(self.name) | ||||||
|  |  | ||||||
|             return |             return | ||||||
|  |  | ||||||
|  |  | ||||||
|         looping = 1 |         looping = 1 | ||||||
|         while looping: |         while looping: | ||||||
|             self.sync(siglistener) |             self.sync(siglistener) | ||||||
|             looping = self.sleeper(siglistener) != 2 |             looping = self.sleeper(siglistener) != 2 | ||||||
|         self.ui.acctdone(self.name) |             self.ui.acctdone(self.name) | ||||||
|  |  | ||||||
|  |  | ||||||
|     def getaccountmeta(self): |     def getaccountmeta(self): | ||||||
|         return os.path.join(self.metadatadir, 'Account-' + self.name) |         return os.path.join(self.metadatadir, 'Account-' + self.name) | ||||||
|   | |||||||
| @@ -23,6 +23,7 @@ from offlineimap.version import versionstr | |||||||
| import rfc822, time, string, random, binascii, re | import rfc822, time, string, random, binascii, re | ||||||
| from StringIO import StringIO | from StringIO import StringIO | ||||||
| from copy import copy | from copy import copy | ||||||
|  | import time | ||||||
|  |  | ||||||
|  |  | ||||||
| class IMAPFolder(BaseFolder): | class IMAPFolder(BaseFolder): | ||||||
| @@ -115,6 +116,7 @@ class IMAPFolder(BaseFolder): | |||||||
|  |  | ||||||
|         return False |         return False | ||||||
|  |  | ||||||
|  |     # TODO: Make this so that it can define a date that would be the oldest messages etc. | ||||||
|     def cachemessagelist(self): |     def cachemessagelist(self): | ||||||
|         imapobj = self.imapserver.acquireconnection() |         imapobj = self.imapserver.acquireconnection() | ||||||
|         self.messagelist = {} |         self.messagelist = {} | ||||||
| @@ -122,20 +124,63 @@ class IMAPFolder(BaseFolder): | |||||||
|         try: |         try: | ||||||
|             # Primes untagged_responses |             # Primes untagged_responses | ||||||
|             imapobj.select(self.getfullname(), readonly = 1, force = 1) |             imapobj.select(self.getfullname(), readonly = 1, force = 1) | ||||||
|             try: |  | ||||||
|                 # Some mail servers do not return an EXISTS response if |  | ||||||
|                 # the folder is empty. |  | ||||||
|                 maxmsgid = long(imapobj.untagged_responses['EXISTS'][0]) |  | ||||||
|             except KeyError: |  | ||||||
|                 return |  | ||||||
|             if maxmsgid < 1: |  | ||||||
|                 # No messages; return |  | ||||||
|                 return |  | ||||||
|  |  | ||||||
|  |             maxage = self.config.getdefaultint("Account " + self.accountname, "maxage", -1) | ||||||
|  |             maxsize = self.config.getdefaultint("Account " + self.accountname, "maxsize", -1) | ||||||
|  |  | ||||||
|  |             if (maxage != -1) | (maxsize != -1): | ||||||
|  |                 try: | ||||||
|  |                     search_condition = "("; | ||||||
|  |  | ||||||
|  |                     if(maxage != -1): | ||||||
|  |                         #find out what the oldest message is that we should look at | ||||||
|  |                         oldest_time_struct = time.gmtime(time.time() - (60*60*24*maxage)) | ||||||
|  |  | ||||||
|  |                         #format this manually - otherwise locales could cause problems | ||||||
|  |                         monthnames_standard = ["Jan", "Feb", "Mar", "Apr", "May", \ | ||||||
|  |                             "June", "July", "Aug", "Sep", "Oct", "Nov", "Dec"] | ||||||
|  |  | ||||||
|  |                         our_monthname = monthnames_standard[oldest_time_struct[1]-1] | ||||||
|  |                         daystr = "%(day)02d" % {'day' : oldest_time_struct[2]} | ||||||
|  |                         date_search_str = "SINCE " + daystr + "-" + our_monthname \ | ||||||
|  |                             + "-" + str(oldest_time_struct[0]) | ||||||
|  |  | ||||||
|  |                         search_condition += date_search_str | ||||||
|  |  | ||||||
|  |                     if(maxsize != -1): | ||||||
|  |                         if(maxage != 1): #There are two conditions - add a space | ||||||
|  |                             search_condition += " " | ||||||
|  |  | ||||||
|  |                         search_condition += "SMALLER " + self.config.getdefault("Account " + self.accountname, "maxsize", -1) | ||||||
|  |  | ||||||
|  |                     search_condition += ")" | ||||||
|  |                     searchresult = imapobj.search(None, search_condition) | ||||||
|  |  | ||||||
|  |                     #result would come back seperated by space - to change into a fetch | ||||||
|  |                     #statement we need to change space to comma | ||||||
|  |                     messagesToFetch = searchresult[1][0].replace(" ", ",") | ||||||
|  |                 except KeyError: | ||||||
|  |                     return | ||||||
|  |                 if len(messagesToFetch) < 1: | ||||||
|  |                     # No messages; return | ||||||
|  |                     return | ||||||
|  |             else: | ||||||
|  |                 try: | ||||||
|  |                     # Some mail servers do not return an EXISTS response if | ||||||
|  |                     # the folder is empty. | ||||||
|  |  | ||||||
|  |                     maxmsgid = long(imapobj.untagged_responses['EXISTS'][0]) | ||||||
|  |                     messagesToFetch = '1:%d' % maxmsgid; | ||||||
|  |                 except KeyError: | ||||||
|  |                     return | ||||||
|  |                 if maxmsgid < 1: | ||||||
|  |                     #no messages; return | ||||||
|  |                     return | ||||||
|             # Now, get the flags and UIDs for these. |             # Now, get the flags and UIDs for these. | ||||||
|             # We could conceivably get rid of maxmsgid and just say |             # We could conceivably get rid of maxmsgid and just say | ||||||
|             # '1:*' here. |             # '1:*' here. | ||||||
|             response = imapobj.fetch('1:%d' % maxmsgid, '(FLAGS UID INTERNALDATE)')[1] |  | ||||||
|  |             response = imapobj.fetch(messagesToFetch, '(FLAGS UID INTERNALDATE)')[1] | ||||||
|         finally: |         finally: | ||||||
|             self.imapserver.releaseconnection(imapobj) |             self.imapserver.releaseconnection(imapobj) | ||||||
|         for messagestr in response: |         for messagestr in response: | ||||||
|   | |||||||
| @@ -29,6 +29,7 @@ except ImportError: | |||||||
|  |  | ||||||
| uidmatchre = re.compile(',U=(\d+)') | uidmatchre = re.compile(',U=(\d+)') | ||||||
| flagmatchre = re.compile(':.*2,([A-Z]+)') | flagmatchre = re.compile(':.*2,([A-Z]+)') | ||||||
|  | timestampmatchre = re.compile('(\d+)'); | ||||||
|  |  | ||||||
| timeseq = 0 | timeseq = 0 | ||||||
| lasttime = long(0) | lasttime = long(0) | ||||||
| @@ -72,6 +73,28 @@ class MaildirFolder(BaseFolder): | |||||||
|         token.""" |         token.""" | ||||||
|         return 42 |         return 42 | ||||||
|  |  | ||||||
|  |     #Checks to see if the given message is within the maximum age according | ||||||
|  |     #to the maildir name which should begin with a timestamp | ||||||
|  |     def _iswithinmaxage(self, messagename, maxage): | ||||||
|  |         #In order to have the same behaviour as SINCE in an IMAP search | ||||||
|  |         #we must convert this to the oldest time and then strip off hrs/mins | ||||||
|  |         #from that day | ||||||
|  |         oldest_time_utc = time.time() - (60*60*24*maxage) | ||||||
|  |         oldest_time_struct = time.gmtime(oldest_time_utc) | ||||||
|  |         oldest_time_today_seconds = ((oldest_time_struct[3] * 3600) \ | ||||||
|  |             + (oldest_time_struct[4] * 60) \ | ||||||
|  |             + oldest_time_struct[5]) | ||||||
|  |         oldest_time_utc -= oldest_time_today_seconds | ||||||
|  |  | ||||||
|  |         timestampmatch = timestampmatchre.search(messagename) | ||||||
|  |         timestampstr = timestampmatch.group() | ||||||
|  |         timestamplong = long(timestampstr) | ||||||
|  |         if(timestamplong < oldest_time_utc): | ||||||
|  |             return False | ||||||
|  |         else: | ||||||
|  |             return True | ||||||
|  |  | ||||||
|  |  | ||||||
|     def _scanfolder(self): |     def _scanfolder(self): | ||||||
|         """Cache the message list.  Maildir flags are: |         """Cache the message list.  Maildir flags are: | ||||||
|         R (replied) |         R (replied) | ||||||
| @@ -92,6 +115,25 @@ class MaildirFolder(BaseFolder): | |||||||
|                          filename in os.listdir(fulldirname)) |                          filename in os.listdir(fulldirname)) | ||||||
|         for file in files: |         for file in files: | ||||||
|             messagename = os.path.basename(file) |             messagename = os.path.basename(file) | ||||||
|  |  | ||||||
|  |             #check if there is a parameter for maxage / maxsize - then see if this | ||||||
|  |             #message should be considered or not | ||||||
|  |             maxage = self.config.getdefaultint("Account " + self.accountname, "maxage", -1) | ||||||
|  |             maxsize = self.config.getdefaultint("Account " + self.accountname, "maxsize", -1) | ||||||
|  |  | ||||||
|  |             if(maxage != -1): | ||||||
|  |                 isnewenough = self._iswithinmaxage(messagename, maxage) | ||||||
|  |                 if(isnewenough != True): | ||||||
|  |                     #this message is older than we should consider.... | ||||||
|  |                     continue | ||||||
|  |  | ||||||
|  |             #Check and see if the message is too big if the maxsize for this account is set | ||||||
|  |             if(maxsize != -1): | ||||||
|  |                 filesize = os.path.getsize(file) | ||||||
|  |                 if(filesize > maxsize): | ||||||
|  |                     continue | ||||||
|  |              | ||||||
|  |  | ||||||
|             foldermatch = messagename.find(folderstr) != -1 |             foldermatch = messagename.find(folderstr) != -1 | ||||||
|             if not foldermatch: |             if not foldermatch: | ||||||
|                 # If there is no folder MD5 specified, or if it mismatches, |                 # If there is no folder MD5 specified, or if it mismatches, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user