Merge pull request #491 from Unrud/uploadall

Optimize upload of whole collections
This commit is contained in:
Guillaume Ayoub 2016-08-26 22:34:46 +02:00 committed by GitHub
commit a3dcfaacd6

View File

@ -323,19 +323,6 @@ class BaseCollection:
"""Upload a new item.""" """Upload a new item."""
raise NotImplementedError raise NotImplementedError
def upload_all(self, vobject_items):
"""Upload a new set of items.
This takes a mapping of href and vobject items and
returns a list of uploaded items.
Might bring optimizations on some storages.
"""
return [
self.upload(href, vobject_item)
for href, vobject_item in vobject_items.items()
]
def update(self, href, vobject_item): def update(self, href, vobject_item):
"""Update an item. """Update an item.
@ -428,11 +415,12 @@ class Collection(BaseCollection):
raise raise
self._sync_directory(directory) self._sync_directory(directory)
def _find_available_file_name(self): @staticmethod
def _find_available_file_name(exists_fn):
# Prevent infinite loop # Prevent infinite loop
for _ in range(10000): for _ in range(10000):
file_name = hex(getrandbits(32))[2:] file_name = hex(getrandbits(32))[2:]
if not self.has(file_name): if not exists_fn(file_name):
return file_name return file_name
raise FileExistsError(errno.EEXIST, "No usable file name found") raise FileExistsError(errno.EEXIST, "No usable file name found")
@ -569,26 +557,20 @@ class Collection(BaseCollection):
items.extend( items.extend(
getattr(collection, "%s_list" % content, [])) getattr(collection, "%s_list" % content, []))
items_by_uid = groupby(sorted(items, key=get_uid), get_uid) items_by_uid = groupby(sorted(items, key=get_uid), get_uid)
collections = {} vobject_items = {}
for uid, items in items_by_uid: for uid, items in items_by_uid:
new_collection = vobject.iCalendar() new_collection = vobject.iCalendar()
for item in items: for item in items:
new_collection.add(item) new_collection.add(item)
href = self._find_available_file_name(vobject_items.get)
# Prevent infinite loop vobject_items[href] = new_collection
for _ in range(10000): self.upload_all_nonatomic(vobject_items)
href = self._find_available_file_name()
if href not in collections:
break
else:
raise FileExistsError(
errno.EEXIST, "No usable file name found")
collections[href] = new_collection
self.upload_all(collections)
elif props.get("tag") == "VCARD": elif props.get("tag") == "VCARD":
vobject_items = {}
for card in collection: for card in collection:
self.upload(self._find_available_file_name(), card) href = self._find_available_file_name(vobject_items.get)
vobject_items[href] = card
self.upload_all_nonatomic(vobject_items)
# This operation is not atomic on the filesystem level but it's # This operation is not atomic on the filesystem level but it's
# very unlikely that one rename operations succeeds while the # very unlikely that one rename operations succeeds while the
@ -600,6 +582,30 @@ class Collection(BaseCollection):
return cls(sane_path, principal=principal) return cls(sane_path, principal=principal)
def upload_all_nonatomic(self, vobject_items):
"""Upload a new set of items.
This takes a mapping of href and vobject items and
uploads them nonatomic and without existence checks.
"""
fs = []
for href, item in vobject_items.items():
path = path_to_filesystem(self._filesystem_path, href)
fs.append(open(path, "w", encoding=self.encoding, newline=""))
fs[-1].write(item.serialize())
fsync_fn = lambda fd: None
if self.configuration.getboolean("storage", "fsync"):
if os.name == "posix" and hasattr(fcntl, "F_FULLFSYNC"):
fsync_fn = lambda fd: fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
else:
fsync_fn = os.fsync
# sync everything at once because it's slightly faster.
for f in fs:
fsync_fn(f.fileno())
f.close()
self._sync_directory(self._filesystem_path)
@classmethod @classmethod
def move(cls, item, to_collection, to_href): def move(cls, item, to_collection, to_href):
os.replace( os.replace(