Make Radicale fast (#569)
* Change get_multi to also return missing items get_multi is not used anywhere and this makes it easier to use. * Use get_multi for report requests * Add get_all to BaseCollection This can be used for optimization on multifilesystem. * Use iterator for files * Remove unnecessary checks This never happens and would be an error. * Don't raise exception when calling get with colliding name This behavior is wrong, it should be handled as if the file doesn't exist. * Use get_all and get_multi to skip unnecessary checks Collision checks are slow on big collections. * Use exception instead of existence checks It's a bit faster. * Use os.scandir instead of os.listdir It's faster and doesn't load all files at once. * Cache metadata when storage is read-only Metadata is queried a lot during a request. It's quiet slow to load and parse the file every time. * Cache the etag when the storage is read-only The etag is calculated twice for GET requests on collections. * Add helper method for cleaning caches * Use item etags to calculate collection etag It's very slow and unnecessary to parse all files with VObject and serialize them again. * Cache serialized collections in file system Serialization is very slow for big collections. This caches the result in a file. * Add helper function for prefilters The simplify_prefilters functions converts XML filters to a simple tag and time range, which can be easily matched against the tag and time range that are extracted from vobject_items by the function find_tag_and_time_range. * Add ability to cache etag and serialization of item Parsing items with vobject is very slow and not required for many requests. Caching can be used to speed it up. * Cache metadata and serialization from items in file system Store the serialized text and the tag and time range from vobject_items in the cache. The metadata is used for prefilters. * Remove the cache for the serialization of collections * Serialize calendars without vobject Merge the calendar components manually. This is much faster and requires less memory. Caching of the result is not required anymore. * Allow pre_filtered_list to indicate that filters match The storage backend can indicate that it evaluated the filters completely. * Skip filtering with vobject if prefiltering is sufficient ``simplify_prefilters`` indicates if the simplified condition is identical to ``filters``. This is used in the multifilesystem backend to detect if prefiltering is sufficient. * Make constants global * Use generator expressions * Only extract elements from inside of VCALENDAR This is unnecessary at the moment, the text representation should never contain anything but VCALENDAR. * Improve comments * restore backward compatiblity * Small improvements for fastbackend
This commit is contained in:
parent
78a62aee86
commit
9ceae0a751
@ -27,7 +27,6 @@ entry.
|
||||
|
||||
import binascii
|
||||
import contextlib
|
||||
import datetime
|
||||
import errno
|
||||
import json
|
||||
import os
|
||||
@ -36,6 +35,7 @@ import posixpath
|
||||
import shlex
|
||||
import stat
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
@ -47,6 +47,10 @@ from tempfile import NamedTemporaryFile, TemporaryDirectory
|
||||
|
||||
import vobject
|
||||
|
||||
if sys.version_info >= (3, 5):
|
||||
# HACK: Avoid import cycle for Python < 3.5
|
||||
from . import xmlutils
|
||||
|
||||
if os.name == "nt":
|
||||
import ctypes
|
||||
import ctypes.wintypes
|
||||
@ -89,6 +93,10 @@ elif os.name == "posix":
|
||||
|
||||
def load(configuration, logger):
|
||||
"""Load the storage manager chosen in configuration."""
|
||||
if sys.version_info < (3, 5):
|
||||
# HACK: Avoid import cycle for Python < 3.5
|
||||
global xmlutils
|
||||
from . import xmlutils
|
||||
storage_type = configuration.get("storage", "type")
|
||||
if storage_type == "multifilesystem":
|
||||
collection_class = Collection
|
||||
@ -107,6 +115,27 @@ def load(configuration, logger):
|
||||
return CollectionCopy
|
||||
|
||||
|
||||
def scandir(path, only_dirs=False, only_files=False):
|
||||
"""Iterator for directory elements. (For compatibility with Python < 3.5)
|
||||
|
||||
``only_dirs`` only return directories
|
||||
|
||||
``only_files`` only return files
|
||||
|
||||
"""
|
||||
if sys.version_info >= (3, 5):
|
||||
for entry in os.scandir(path):
|
||||
if ((not only_files or entry.is_file()) and
|
||||
(not only_dirs or entry.is_dir())):
|
||||
yield entry.name
|
||||
else:
|
||||
for name in os.listdir(path):
|
||||
p = os.path.join(path, name)
|
||||
if ((not only_files or os.path.isfile(p)) and
|
||||
(not only_dirs or os.path.isdir(p))):
|
||||
yield name
|
||||
|
||||
|
||||
def get_etag(text):
|
||||
"""Etag from collection or item.
|
||||
|
||||
@ -183,8 +212,8 @@ def path_to_filesystem(root, *paths):
|
||||
safe_path = os.path.join(safe_path, part)
|
||||
# Check for conflicting files (e.g. case-insensitive file systems
|
||||
# or short names on Windows file systems)
|
||||
if os.path.lexists(safe_path):
|
||||
if part not in os.listdir(safe_path_parent):
|
||||
if (os.path.lexists(safe_path) and
|
||||
part not in scandir(safe_path_parent)):
|
||||
raise CollidingPathError(part)
|
||||
return safe_path
|
||||
|
||||
@ -214,19 +243,57 @@ class ComponentNotFoundError(ValueError):
|
||||
|
||||
|
||||
class Item:
|
||||
def __init__(self, collection, item, href, last_modified=None):
|
||||
def __init__(self, collection, item=None, href=None, last_modified=None,
|
||||
text=None, etag=None):
|
||||
"""Initialize an item.
|
||||
|
||||
``collection`` the parent collection.
|
||||
|
||||
``href`` the href of the item.
|
||||
|
||||
``last_modified`` the HTTP-datetime of when the item was modified.
|
||||
|
||||
``text`` the text representation of the item (optional if ``item`` is
|
||||
set).
|
||||
|
||||
``item`` the vobject item (optional if ``text`` is set).
|
||||
|
||||
``etag`` the etag of the item (optional). See ``get_etag``.
|
||||
|
||||
"""
|
||||
if text is None and item is None:
|
||||
raise ValueError("at least one of 'text' or 'item' must be set")
|
||||
self.collection = collection
|
||||
self.item = item
|
||||
self.href = href
|
||||
self.last_modified = last_modified
|
||||
self._text = text
|
||||
self._item = item
|
||||
self._etag = etag
|
||||
|
||||
def __getattr__(self, attr):
|
||||
return getattr(self.item, attr)
|
||||
|
||||
def serialize(self):
|
||||
if self._text is None:
|
||||
self._text = self.item.serialize()
|
||||
return self._text
|
||||
|
||||
@property
|
||||
def item(self):
|
||||
if self._item is None:
|
||||
try:
|
||||
self._item = vobject.readOne(self._text)
|
||||
except Exception as e:
|
||||
raise RuntimeError("Failed to parse item %r in %r" %
|
||||
(self.href, self.collection.path)) from e
|
||||
return self._item
|
||||
|
||||
@property
|
||||
def etag(self):
|
||||
"""Encoded as quoted-string (see RFC 2616)."""
|
||||
return get_etag(self.serialize())
|
||||
if self._etag is None:
|
||||
self._etag = get_etag(self.serialize())
|
||||
return self._etag
|
||||
|
||||
|
||||
class BaseCollection:
|
||||
@ -331,21 +398,54 @@ class BaseCollection:
|
||||
def get_multi(self, hrefs):
|
||||
"""Fetch multiple items. Duplicate hrefs must be ignored.
|
||||
|
||||
DEPRECATED: use ``get_multi2`` instead
|
||||
|
||||
"""
|
||||
return (self.get(href) for href in set(hrefs))
|
||||
|
||||
def get_multi2(self, hrefs):
|
||||
"""Fetch multiple items.
|
||||
|
||||
Functionally similar to ``get``, but might bring performance benefits
|
||||
on some storages when used cleverly. It's not required to return the
|
||||
requested items in the correct order. Duplicated hrefs can be ignored.
|
||||
|
||||
Returns tuples with the href and the item or None if the item doesn't
|
||||
exist.
|
||||
|
||||
"""
|
||||
return ((href, self.get(href)) for href in hrefs)
|
||||
|
||||
def get_all(self):
|
||||
"""Fetch all items.
|
||||
|
||||
Functionally similar to ``get``, but might bring performance benefits
|
||||
on some storages when used cleverly.
|
||||
|
||||
"""
|
||||
for href in set(hrefs):
|
||||
yield self.get(href)
|
||||
return map(self.get, self.list())
|
||||
|
||||
def get_all_filtered(self, filters):
|
||||
"""Fetch all items with optional filtering.
|
||||
|
||||
This can largely improve performance of reports depending on
|
||||
the filters and this implementation.
|
||||
|
||||
Returns tuples in the form ``(item, filters_matched)``.
|
||||
``filters_matched`` is a bool that indicates if ``filters`` are fully
|
||||
matched.
|
||||
|
||||
This returns all events by default
|
||||
"""
|
||||
return ((item, False) for item in self.get_all())
|
||||
|
||||
def pre_filtered_list(self, filters):
|
||||
"""List collection items with optional pre filtering.
|
||||
|
||||
This could largely improve performance of reports depending on
|
||||
the filters and this implementation.
|
||||
This returns all event by default
|
||||
DEPRECATED: use ``get_all_filtered`` instead
|
||||
|
||||
"""
|
||||
return [self.get(href) for href in self.list()]
|
||||
return self.get_all()
|
||||
|
||||
def has(self, href):
|
||||
"""Check if an item exists by its href.
|
||||
@ -414,6 +514,8 @@ class Collection(BaseCollection):
|
||||
split_path = self.path.split("/")
|
||||
self.owner = split_path[0] if len(split_path) > 1 else None
|
||||
self.is_principal = principal
|
||||
self._meta = None
|
||||
self._etag = None
|
||||
|
||||
@classmethod
|
||||
def _get_collection_root_folder(cls):
|
||||
@ -533,14 +635,12 @@ class Collection(BaseCollection):
|
||||
for item in collection.list():
|
||||
yield collection.get(item)
|
||||
|
||||
for href in os.listdir(filesystem_path):
|
||||
for href in scandir(filesystem_path, only_dirs=True):
|
||||
if not is_safe_filesystem_path_component(href):
|
||||
if not href.startswith(".Radicale"):
|
||||
cls.logger.debug("Skipping collection %r in %r", href,
|
||||
path)
|
||||
continue
|
||||
child_filesystem_path = path_to_filesystem(filesystem_path, href)
|
||||
if os.path.isdir(child_filesystem_path):
|
||||
child_path = posixpath.join(path, href)
|
||||
child_principal = len(attributes) == 0
|
||||
yield cls(child_path, child_principal)
|
||||
@ -724,7 +824,7 @@ class Collection(BaseCollection):
|
||||
history_folder = os.path.join(self._filesystem_path,
|
||||
".Radicale.cache", "history")
|
||||
try:
|
||||
for href in os.listdir(history_folder):
|
||||
for href in scandir(history_folder):
|
||||
if not is_safe_filesystem_path_component(href):
|
||||
continue
|
||||
if os.path.isfile(os.path.join(self._filesystem_path, href)):
|
||||
@ -766,7 +866,7 @@ class Collection(BaseCollection):
|
||||
token_name_hash = md5()
|
||||
# Find the history of all existing and deleted items
|
||||
for href, item in chain(
|
||||
((item.href, item) for item in self.pre_filtered_list(())),
|
||||
((item.href, item) for item in self.get_all()),
|
||||
((href, None) for href in self._get_deleted_history_hrefs())):
|
||||
history_etag = self._update_history_etag(href, item)
|
||||
state[href] = history_etag
|
||||
@ -835,43 +935,135 @@ class Collection(BaseCollection):
|
||||
return token, changes
|
||||
|
||||
def list(self):
|
||||
for href in os.listdir(self._filesystem_path):
|
||||
for href in scandir(self._filesystem_path, only_files=True):
|
||||
if not is_safe_filesystem_path_component(href):
|
||||
if not href.startswith(".Radicale"):
|
||||
self.logger.debug(
|
||||
"Skipping item %r in %r", href, self.path)
|
||||
continue
|
||||
path = os.path.join(self._filesystem_path, href)
|
||||
if os.path.isfile(path):
|
||||
yield href
|
||||
|
||||
def get(self, href):
|
||||
if not href:
|
||||
return None
|
||||
_item_cache_cleaned = False
|
||||
|
||||
def get(self, href, verify_href=True):
|
||||
item, metadata = self._get_with_metadata(href, verify_href=verify_href)
|
||||
return item
|
||||
|
||||
def _get_with_metadata(self, href, verify_href=True):
|
||||
# Like ``get`` but additonally returns the following metadata:
|
||||
# tag, start, end: see ``xmlutils.find_tag_and_time_range``
|
||||
if verify_href:
|
||||
try:
|
||||
if not is_safe_filesystem_path_component(href):
|
||||
self.logger.debug("Can't translate name %r safely to filesystem "
|
||||
"in %r", href, self.path)
|
||||
return None
|
||||
raise UnsafePathError(href)
|
||||
path = path_to_filesystem(self._filesystem_path, href)
|
||||
if not os.path.isfile(path):
|
||||
return None
|
||||
with open(path, encoding=self.encoding, newline="") as f:
|
||||
text = f.read()
|
||||
except ValueError as e:
|
||||
self.logger.debug(
|
||||
"Can't translate name %r safely to filesystem in %r: %s",
|
||||
href, self.path, e, exc_info=True)
|
||||
return None, None
|
||||
else:
|
||||
path = os.path.join(self._filesystem_path, href)
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
btext = f.read()
|
||||
except (FileNotFoundError, IsADirectoryError):
|
||||
return None, None
|
||||
# The hash of the component in the file system. This is used to check,
|
||||
# if the entry in the cache is still valid.
|
||||
input_hash = md5()
|
||||
input_hash.update(btext)
|
||||
input_hash = input_hash.hexdigest()
|
||||
cache_folder = os.path.join(self._filesystem_path, ".Radicale.cache",
|
||||
"item")
|
||||
try:
|
||||
with open(os.path.join(cache_folder, href), "rb") as f:
|
||||
cinput_hash, cetag, ctext, ctag, cstart, cend = pickle.load(f)
|
||||
except (FileNotFoundError, pickle.UnpicklingError, ValueError) as e:
|
||||
if isinstance(e, (pickle.UnpicklingError, ValueError)):
|
||||
self.logger.warning(
|
||||
"Failed to load item cache entry %r in %r: %s",
|
||||
href, self.path, e, exc_info=True)
|
||||
cinput_hash = cetag = ctext = ctag = cstart = cend = None
|
||||
vobject_item = None
|
||||
if input_hash != cinput_hash:
|
||||
vobject_item = Item(self, href=href,
|
||||
text=btext.decode(self.encoding)).item
|
||||
# Serialize the object again, to normalize the text representation.
|
||||
# The storage may have been edited externally.
|
||||
ctext = vobject_item.serialize()
|
||||
cetag = get_etag(ctext)
|
||||
try:
|
||||
ctag, cstart, cend = xmlutils.find_tag_and_time_range(
|
||||
vobject_item)
|
||||
except Exception as e:
|
||||
raise RuntimeError("Failed to find tag and time range of item "
|
||||
"%r from %r: %s" % (href, self.path,
|
||||
e)) from e
|
||||
self._makedirs_synced(cache_folder)
|
||||
try:
|
||||
# Race: Other processes might have created and locked the
|
||||
# file.
|
||||
with self._atomic_write(os.path.join(cache_folder, href),
|
||||
"wb") as f:
|
||||
pickle.dump((input_hash, cetag, ctext,
|
||||
ctag, cstart, cend), f)
|
||||
except PermissionError:
|
||||
pass
|
||||
# Clean cache entries (max once per request)
|
||||
# This happens once after new uploads, or if the data in the
|
||||
# file system was edited externally.
|
||||
if not self._item_cache_cleaned:
|
||||
self._item_cache_cleaned = True
|
||||
self._clean_cache(cache_folder, (
|
||||
href for href in scandir(cache_folder) if not
|
||||
os.path.isfile(os.path.join(self._filesystem_path, href))))
|
||||
last_modified = time.strftime(
|
||||
"%a, %d %b %Y %H:%M:%S GMT",
|
||||
time.gmtime(os.path.getmtime(path)))
|
||||
try:
|
||||
item = vobject.readOne(text)
|
||||
except Exception as e:
|
||||
raise RuntimeError("Failed to parse item %r in %r" %
|
||||
(href, self.path)) from e
|
||||
return Item(self, item, href, last_modified)
|
||||
return Item(self, href=href, last_modified=last_modified, etag=cetag,
|
||||
text=ctext, item=vobject_item), (ctag, cstart, cend)
|
||||
|
||||
def get_multi2(self, hrefs):
|
||||
# It's faster to check for file name collissions here, because
|
||||
# we only need to call os.listdir once.
|
||||
files = None
|
||||
for href in hrefs:
|
||||
if files is None:
|
||||
# List dir after hrefs returned one item, the iterator may be
|
||||
# empty and the for-loop is never executed.
|
||||
files = os.listdir(self._filesystem_path)
|
||||
path = os.path.join(self._filesystem_path, href)
|
||||
if (not is_safe_filesystem_path_component(href) or
|
||||
href not in files and os.path.lexists(path)):
|
||||
self.logger.debug(
|
||||
"Can't translate name safely to filesystem: %r", href)
|
||||
yield (href, None)
|
||||
else:
|
||||
yield (href, self.get(href, verify_href=False))
|
||||
|
||||
def get_all(self):
|
||||
# We don't need to check for collissions, because the the file names
|
||||
# are from os.listdir.
|
||||
return (self.get(href, verify_href=False) for href in self.list())
|
||||
|
||||
def get_all_filtered(self, filters):
|
||||
tag, start, end, simple = xmlutils.simplify_prefilters(filters)
|
||||
if not tag:
|
||||
# no filter
|
||||
yield from ((item, simple) for item in self.get_all())
|
||||
return
|
||||
for item, (itag, istart, iend) in (
|
||||
self._get_with_metadata(href, verify_href=False)
|
||||
for href in self.list()):
|
||||
if tag == itag and istart < end and iend > start:
|
||||
yield item, simple and (start <= istart or iend <= end)
|
||||
|
||||
def upload(self, href, vobject_item):
|
||||
if not is_safe_filesystem_path_component(href):
|
||||
raise UnsafePathError(href)
|
||||
path = path_to_filesystem(self._filesystem_path, href)
|
||||
item = Item(self, vobject_item, href)
|
||||
item = Item(self, href=href, item=vobject_item)
|
||||
with self._atomic_write(path, newline="") as fd:
|
||||
fd.write(item.serialize())
|
||||
# Track the change
|
||||
@ -907,57 +1099,101 @@ class Collection(BaseCollection):
|
||||
self._clean_history_cache()
|
||||
|
||||
def get_meta(self, key=None):
|
||||
if os.path.exists(self._props_path):
|
||||
with open(self._props_path, encoding=self.encoding) as f:
|
||||
# reuse cached value if the storage is read-only
|
||||
if self._writer or self._meta is None:
|
||||
try:
|
||||
meta = json.load(f)
|
||||
with open(self._props_path, encoding=self.encoding) as f:
|
||||
self._meta = json.load(f)
|
||||
except FileNotFoundError:
|
||||
self._meta = {}
|
||||
except ValueError as e:
|
||||
raise RuntimeError("Failed to load properties of collect"
|
||||
"ion %r: %s" % (self.path, e)) from e
|
||||
return meta.get(key) if key else meta
|
||||
return self._meta.get(key) if key else self._meta
|
||||
|
||||
def set_meta(self, props):
|
||||
if os.path.exists(self._props_path):
|
||||
with open(self._props_path, encoding=self.encoding) as f:
|
||||
old_props = json.load(f)
|
||||
old_props.update(props)
|
||||
props = old_props
|
||||
props = {key: value for key, value in props.items() if value}
|
||||
with self._atomic_write(self._props_path, "w+") as f:
|
||||
json.dump(props, f)
|
||||
new_props = self.get_meta()
|
||||
new_props.update(props)
|
||||
for key in tuple(new_props.keys()):
|
||||
if not new_props[key]:
|
||||
del new_props[key]
|
||||
with self._atomic_write(self._props_path, "w") as f:
|
||||
json.dump(new_props, f)
|
||||
|
||||
@property
|
||||
def last_modified(self):
|
||||
relevant_files = [self._filesystem_path] + [
|
||||
path_to_filesystem(self._filesystem_path, href)
|
||||
for href in self.list()]
|
||||
if os.path.exists(self._props_path):
|
||||
relevant_files.append(self._props_path)
|
||||
relevant_files = chain(
|
||||
(self._filesystem_path,),
|
||||
(self._props_path,) if os.path.exists(self._props_path) else (),
|
||||
(os.path.join(self._filesystem_path, h) for h in self.list()))
|
||||
last = max(map(os.path.getmtime, relevant_files))
|
||||
return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(last))
|
||||
|
||||
def serialize(self):
|
||||
items = []
|
||||
time_begin = datetime.datetime.now()
|
||||
for href in self.list():
|
||||
items.append(self.get(href).item)
|
||||
time_end = datetime.datetime.now()
|
||||
self.logger.info(
|
||||
"Read %d items in %.3f seconds from %r", len(items),
|
||||
(time_end - time_begin).total_seconds(), self.path)
|
||||
# serialize collection
|
||||
if self.get_meta("tag") == "VCALENDAR":
|
||||
collection = vobject.iCalendar()
|
||||
for item in items:
|
||||
for content in ("vevent", "vtodo", "vjournal"):
|
||||
if content in item.contents:
|
||||
for item_part in getattr(item, "%s_list" % content):
|
||||
collection.add(item_part)
|
||||
break
|
||||
return collection.serialize()
|
||||
in_vcalendar = False
|
||||
vtimezones = ""
|
||||
included_tzids = set()
|
||||
vtimezone = []
|
||||
tzid = None
|
||||
components = ""
|
||||
# Concatenate all child elements of VCALENDAR from all items
|
||||
# together, while preventing duplicated VTIMEZONE entries.
|
||||
# VTIMEZONEs are only distinguished by their TZID, if different
|
||||
# timezones share the same TZID this produces errornous ouput.
|
||||
# VObject fails at this too.
|
||||
for item in self.get_all():
|
||||
depth = 0
|
||||
for line in item.serialize().split("\r\n"):
|
||||
if line.startswith("BEGIN:"):
|
||||
depth += 1
|
||||
if depth == 1 and line == "BEGIN:VCALENDAR":
|
||||
in_vcalendar = True
|
||||
elif in_vcalendar:
|
||||
if depth == 1 and line.startswith("END:"):
|
||||
in_vcalendar = False
|
||||
if depth == 2 and line == "BEGIN:VTIMEZONE":
|
||||
vtimezone.append(line)
|
||||
elif vtimezone:
|
||||
vtimezone.append(line)
|
||||
if depth == 2 and line.startswith("TZID:"):
|
||||
tzid = line[len("TZID:"):]
|
||||
elif depth == 2 and line.startswith("END:"):
|
||||
if tzid is None or tzid not in included_tzids:
|
||||
if vtimezones:
|
||||
vtimezones += "\r\n"
|
||||
vtimezones += "\r\n".join(vtimezone)
|
||||
included_tzids.add(tzid)
|
||||
vtimezone.clear()
|
||||
tzid = None
|
||||
elif depth >= 2:
|
||||
if components:
|
||||
components += "\r\n"
|
||||
components += line
|
||||
if line.startswith("END:"):
|
||||
depth -= 1
|
||||
return "\r\n".join(filter(bool, (
|
||||
"BEGIN:VCALENDAR",
|
||||
"VERSION:2.0",
|
||||
"PRODID:-//PYVOBJECT//NONSGML Version 1//EN",
|
||||
vtimezones,
|
||||
components,
|
||||
"END:VCALENDAR")))
|
||||
elif self.get_meta("tag") == "VADDRESSBOOK":
|
||||
return "".join([item.serialize() for item in items])
|
||||
return "".join((item.serialize() for item in self.get_all()))
|
||||
return ""
|
||||
|
||||
@property
|
||||
def etag(self):
|
||||
# reuse cached value if the storage is read-only
|
||||
if self._writer or self._etag is None:
|
||||
etag = md5()
|
||||
for item in self.get_all():
|
||||
etag.update((item.href + "/" + item.etag).encode("utf-8"))
|
||||
self._etag = '"%s"' % etag.hexdigest()
|
||||
return self._etag
|
||||
|
||||
_lock = threading.Lock()
|
||||
_waiters = []
|
||||
_lock_file = None
|
||||
|
@ -26,12 +26,14 @@ in them for XML requests (all but PUT).
|
||||
"""
|
||||
|
||||
import copy
|
||||
import math
|
||||
import posixpath
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
from collections import OrderedDict
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
from http import client
|
||||
from itertools import chain
|
||||
from urllib.parse import quote, unquote, urlparse
|
||||
|
||||
from . import storage
|
||||
@ -56,6 +58,13 @@ for short, url in NAMESPACES.items():
|
||||
CLARK_TAG_REGEX = re.compile(r"{(?P<namespace>[^}]*)}(?P<tag>.*)", re.VERBOSE)
|
||||
HUMAN_REGEX = re.compile(r"(?P<namespace>[^:{}]*)(?P<tag>.*)", re.VERBOSE)
|
||||
|
||||
DAY = timedelta(days=1)
|
||||
SECOND = timedelta(seconds=1)
|
||||
DATETIME_MIN = datetime.min.replace(tzinfo=timezone.utc)
|
||||
DATETIME_MAX = datetime.max.replace(tzinfo=timezone.utc)
|
||||
TIMESTAMP_MIN = math.floor(DATETIME_MIN.timestamp())
|
||||
TIMESTAMP_MAX = math.ceil(DATETIME_MAX.timestamp())
|
||||
|
||||
|
||||
def pretty_xml(element, level=0):
|
||||
"""Indent an ElementTree ``element`` and its children."""
|
||||
@ -210,11 +219,9 @@ def _prop_match(item, filter_):
|
||||
|
||||
|
||||
def _time_range_match(vobject_item, filter_, child_name):
|
||||
"""Check whether the ``item`` matches the time-range ``filter_``.
|
||||
"""Check whether the component/property ``child_name`` of
|
||||
``vobject_item`` matches the time-range ``filter_``."""
|
||||
|
||||
See rfc4791-9.9.
|
||||
|
||||
"""
|
||||
start = filter_.get("start")
|
||||
end = filter_.get("end")
|
||||
if not start and not end:
|
||||
@ -229,14 +236,53 @@ def _time_range_match(vobject_item, filter_, child_name):
|
||||
end = datetime.max
|
||||
start = start.replace(tzinfo=timezone.utc)
|
||||
end = end.replace(tzinfo=timezone.utc)
|
||||
child = getattr(vobject_item, child_name.lower())
|
||||
|
||||
matched = False
|
||||
|
||||
def range_fn(range_start, range_end):
|
||||
nonlocal matched
|
||||
if start < range_end and range_start < end:
|
||||
matched = True
|
||||
return True
|
||||
if end < range_start:
|
||||
return True
|
||||
return False
|
||||
|
||||
def infinity_fn(start):
|
||||
return False
|
||||
|
||||
_visit_time_ranges(vobject_item, child_name, range_fn, infinity_fn)
|
||||
return matched
|
||||
|
||||
|
||||
def _visit_time_ranges(vobject_item, child_name, range_fn, infinity_fn):
|
||||
"""Visit all time ranges in the component/property ``child_name`` of
|
||||
`vobject_item`` with visitors ``range_fn`` and ``infinity_fn``.
|
||||
|
||||
``range_fn`` gets called for every time_range with ``start`` and ``end``
|
||||
datetimes as arguments. If the function returns True, the operation is
|
||||
cancelled.
|
||||
|
||||
``infinity_fn`` gets called when an infiite recurrence rule is detected
|
||||
with ``start`` datetime as argument. If the function returns True, the
|
||||
operation is cancelled.
|
||||
|
||||
See rfc4791-9.9.
|
||||
|
||||
"""
|
||||
child = getattr(vobject_item, child_name.lower())
|
||||
# Comments give the lines in the tables of the specification
|
||||
if child_name == "VEVENT":
|
||||
# TODO: check if there's a timezone
|
||||
dtstart = child.dtstart.value
|
||||
|
||||
if child.rruleset:
|
||||
if (";UNTIL=" not in child.rrule.value and
|
||||
";COUNT=" not in child.rrule.value):
|
||||
for dtstart in child.getrruleset(addRDate=True):
|
||||
if infinity_fn(_date_to_datetime(dtstart)):
|
||||
return
|
||||
break
|
||||
dtstarts = child.getrruleset(addRDate=True)
|
||||
else:
|
||||
dtstarts = (dtstart,)
|
||||
@ -255,31 +301,30 @@ def _time_range_match(vobject_item, filter_, child_name):
|
||||
dtstart_is_datetime = isinstance(dtstart, datetime)
|
||||
dtstart = _date_to_datetime(dtstart)
|
||||
|
||||
if dtstart > end:
|
||||
break
|
||||
|
||||
if dtend is not None:
|
||||
# Line 1
|
||||
dtend = dtstart + timedelta(seconds=original_duration)
|
||||
if start < dtend and end > dtstart:
|
||||
return True
|
||||
if range_fn(dtstart, dtend):
|
||||
return
|
||||
elif duration is not None:
|
||||
if original_duration is None:
|
||||
original_duration = duration.seconds
|
||||
if duration.seconds > 0:
|
||||
# Line 2
|
||||
if start < dtstart + duration and end > dtstart:
|
||||
return True
|
||||
elif start <= dtstart and end > dtstart:
|
||||
if range_fn(dtstart, dtstart + duration):
|
||||
return
|
||||
else:
|
||||
# Line 3
|
||||
return True
|
||||
if range_fn(dtstart, dtstart + SECOND):
|
||||
return
|
||||
elif dtstart_is_datetime:
|
||||
# Line 4
|
||||
if start <= dtstart and end > dtstart:
|
||||
return True
|
||||
elif start < dtstart + timedelta(days=1) and end > dtstart:
|
||||
if range_fn(dtstart, dtstart + SECOND):
|
||||
return
|
||||
else:
|
||||
# Line 5
|
||||
return True
|
||||
if range_fn(dtstart, dtstart + DAY):
|
||||
return
|
||||
|
||||
elif child_name == "VTODO":
|
||||
dtstart = getattr(child, "dtstart", None)
|
||||
@ -305,6 +350,12 @@ def _time_range_match(vobject_item, filter_, child_name):
|
||||
created = _date_to_datetime(created.value)
|
||||
|
||||
if child.rruleset:
|
||||
if (";UNTIL=" not in child.rrule.value and
|
||||
";COUNT=" not in child.rrule.value):
|
||||
for reference_date in child.getrruleset(addRDate=True):
|
||||
if infinity_fn(_date_to_datetime(reference_date)):
|
||||
return
|
||||
break
|
||||
reference_dates = child.getrruleset(addRDate=True)
|
||||
else:
|
||||
if dtstart is not None:
|
||||
@ -317,47 +368,56 @@ def _time_range_match(vobject_item, filter_, child_name):
|
||||
reference_dates = (created,)
|
||||
else:
|
||||
# Line 8
|
||||
return True
|
||||
if range_fn(DATETIME_MIN, DATETIME_MAX):
|
||||
return
|
||||
reference_dates = ()
|
||||
|
||||
for reference_date in reference_dates:
|
||||
reference_date = _date_to_datetime(reference_date)
|
||||
if reference_date > end:
|
||||
break
|
||||
|
||||
if dtstart is not None and duration is not None:
|
||||
# Line 1
|
||||
if start <= reference_date + duration and (
|
||||
end > reference_date or
|
||||
end >= reference_date + duration):
|
||||
return True
|
||||
if range_fn(reference_date,
|
||||
reference_date + duration + SECOND):
|
||||
return
|
||||
if range_fn(reference_date + duration - SECOND,
|
||||
reference_date + duration + SECOND):
|
||||
return
|
||||
elif dtstart is not None and due is not None:
|
||||
# Line 2
|
||||
due = reference_date + timedelta(seconds=original_duration)
|
||||
if (start < due or start <= reference_date) and (
|
||||
end > reference_date or end >= due):
|
||||
return True
|
||||
if (range_fn(reference_date, due) or
|
||||
range_fn(reference_date, reference_date + SECOND) or
|
||||
range_fn(due - SECOND, due) or
|
||||
range_fn(due - SECOND, reference_date + SECOND)):
|
||||
return
|
||||
elif dtstart is not None:
|
||||
if start <= reference_date and end > reference_date:
|
||||
return True
|
||||
if range_fn(reference_date, reference_date + SECOND):
|
||||
return
|
||||
elif due is not None:
|
||||
# Line 4
|
||||
if start < reference_date and end >= reference_date:
|
||||
return True
|
||||
if range_fn(reference_date - SECOND, reference_date):
|
||||
return
|
||||
elif completed is not None and created is not None:
|
||||
# Line 5
|
||||
completed = reference_date + timedelta(
|
||||
seconds=original_duration)
|
||||
if (start <= reference_date or start <= completed) and (
|
||||
end >= reference_date or end >= completed):
|
||||
return True
|
||||
if (range_fn(reference_date - SECOND,
|
||||
reference_date + SECOND) or
|
||||
range_fn(completed - SECOND, completed + SECOND) or
|
||||
range_fn(reference_date - SECOND,
|
||||
reference_date + SECOND) or
|
||||
range_fn(completed - SECOND, completed + SECOND)):
|
||||
return
|
||||
elif completed is not None:
|
||||
# Line 6
|
||||
if start <= reference_date and end >= reference_date:
|
||||
return True
|
||||
if range_fn(reference_date - SECOND,
|
||||
reference_date + SECOND):
|
||||
return
|
||||
elif created is not None:
|
||||
# Line 7
|
||||
if end > reference_date:
|
||||
return True
|
||||
if range_fn(reference_date, DATETIME_MAX):
|
||||
return
|
||||
|
||||
elif child_name == "VJOURNAL":
|
||||
dtstart = getattr(child, "dtstart", None)
|
||||
@ -365,6 +425,12 @@ def _time_range_match(vobject_item, filter_, child_name):
|
||||
if dtstart is not None:
|
||||
dtstart = dtstart.value
|
||||
if child.rruleset:
|
||||
if (";UNTIL=" not in child.rrule.value and
|
||||
";COUNT=" not in child.rrule.value):
|
||||
for dtstart in child.getrruleset(addRDate=True):
|
||||
if infinity_fn(_date_to_datetime(dtstart)):
|
||||
return
|
||||
break
|
||||
dtstarts = child.getrruleset(addRDate=True)
|
||||
else:
|
||||
dtstarts = (dtstart,)
|
||||
@ -373,18 +439,21 @@ def _time_range_match(vobject_item, filter_, child_name):
|
||||
dtstart_is_datetime = isinstance(dtstart, datetime)
|
||||
dtstart = _date_to_datetime(dtstart)
|
||||
|
||||
if dtstart > end:
|
||||
break
|
||||
|
||||
if dtstart_is_datetime:
|
||||
# Line 1
|
||||
if start <= dtstart and end > dtstart:
|
||||
return True
|
||||
elif start < dtstart + timedelta(days=1) and end > dtstart:
|
||||
if range_fn(dtstart, dtstart + SECOND):
|
||||
return
|
||||
else:
|
||||
# Line 2
|
||||
return True
|
||||
if range_fn(dtstart, dtstart + DAY):
|
||||
return
|
||||
|
||||
return False
|
||||
elif isinstance(child, date):
|
||||
if range_fn(child, child + DAY):
|
||||
return
|
||||
elif isinstance(child, datetime):
|
||||
if range_fn(child, child + SECOND):
|
||||
return
|
||||
|
||||
|
||||
def _text_match(vobject_item, filter_, child_name, attrib_name=None):
|
||||
@ -429,6 +498,99 @@ def _param_filter_match(vobject_item, filter_, parent_name):
|
||||
return condition
|
||||
|
||||
|
||||
def simplify_prefilters(filters):
|
||||
"""Creates a simplified condition from ``filters``.
|
||||
|
||||
Returns a tuple (``tag``, ``start``, ``end``, ``simple``) where ``tag`` is
|
||||
a string or None (match all) and ``start`` and ``end`` are POSIX
|
||||
timestamps (as int). ``simple`` is a bool that indicates that ``filters``
|
||||
and the simplified condition are identical.
|
||||
|
||||
"""
|
||||
flat_filters = tuple(chain.from_iterable(filters))
|
||||
simple = len(flat_filters) <= 1
|
||||
for col_filter in flat_filters:
|
||||
if (col_filter.tag != _tag("C", "comp-filter") or
|
||||
col_filter.get("name") != "VCALENDAR"):
|
||||
simple = False
|
||||
continue
|
||||
simple &= len(col_filter) <= 1
|
||||
for comp_filter in col_filter:
|
||||
if comp_filter.tag != _tag("C", "comp-filter"):
|
||||
simple = False
|
||||
continue
|
||||
tag = comp_filter.get("name")
|
||||
if (tag not in ("VTODO", "VEVENT", "VJOURNAL") or comp_filter.find(
|
||||
_tag("C", "is-not-defined")) is not None):
|
||||
simple = False
|
||||
continue
|
||||
simple &= len(comp_filter) <= 1
|
||||
for time_filter in comp_filter:
|
||||
if time_filter.tag != _tag("C", "time-range"):
|
||||
simple = False
|
||||
continue
|
||||
start = time_filter.get("start")
|
||||
end = time_filter.get("end")
|
||||
if start:
|
||||
start = math.floor(datetime.strptime(
|
||||
start, "%Y%m%dT%H%M%SZ").replace(
|
||||
tzinfo=timezone.utc).timestamp())
|
||||
else:
|
||||
start = TIMESTAMP_MIN
|
||||
if end:
|
||||
end = math.ceil(datetime.strptime(
|
||||
end, "%Y%m%dT%H%M%SZ").replace(
|
||||
tzinfo=timezone.utc).timestamp())
|
||||
else:
|
||||
end = TIMESTAMP_MAX
|
||||
return tag, start, end, simple
|
||||
return tag, TIMESTAMP_MIN, TIMESTAMP_MAX, simple
|
||||
return None, TIMESTAMP_MIN, TIMESTAMP_MAX, simple
|
||||
|
||||
|
||||
def find_tag_and_time_range(vobject_item):
|
||||
"""Find tag and enclosing time range from ``vobject item``.
|
||||
|
||||
Returns a tuple (``tag``, ``start``, ``end``) where ``tag`` is a string
|
||||
and ``start`` and ``end`` are POSIX timestamps (as int).
|
||||
|
||||
This is intened to be used for matching against simplified prefilters.
|
||||
|
||||
"""
|
||||
tag = ""
|
||||
if vobject_item.name == "VCALENDAR":
|
||||
for component in vobject_item.components():
|
||||
if component.name in ("VTODO", "VEVENT", "VJOURNAL"):
|
||||
tag = component.name
|
||||
break
|
||||
if not tag:
|
||||
return (None, math.floor(DATETIME_MIN.timestamp()),
|
||||
math.ceil(DATETIME_MAX.timestamp()))
|
||||
start = end = None
|
||||
|
||||
def range_fn(range_start, range_end):
|
||||
nonlocal start, end
|
||||
if start is None or range_start < start:
|
||||
start = range_start
|
||||
if end is None or end < range_end:
|
||||
end = range_end
|
||||
return False
|
||||
|
||||
def infinity_fn(range_start):
|
||||
nonlocal start, end
|
||||
if start is None or range_start < start:
|
||||
start = range_start
|
||||
end = DATETIME_MAX
|
||||
return True
|
||||
|
||||
_visit_time_ranges(vobject_item, tag, range_fn, infinity_fn)
|
||||
if start is None:
|
||||
start = DATETIME_MIN
|
||||
if end is None:
|
||||
end = DATETIME_MAX
|
||||
return tag, math.floor(start.timestamp()), math.ceil(end.timestamp())
|
||||
|
||||
|
||||
def name_from_path(path, collection):
|
||||
"""Return Radicale item name from ``path``."""
|
||||
path = path.strip("/") + "/"
|
||||
@ -891,43 +1053,60 @@ def report(base_prefix, path, xml_request, collection):
|
||||
root.findall("./%s" % _tag("C", "filter")) +
|
||||
root.findall("./%s" % _tag("CR", "filter")))
|
||||
|
||||
def retrieve_items(collection, hreferences, multistatus):
|
||||
"""Retrieves all items that are referenced in ``hreferences`` from
|
||||
``collection`` and adds 404 responses for missing and invalid items
|
||||
to ``multistatus``."""
|
||||
collection_requested = False
|
||||
|
||||
def get_names():
|
||||
"""Extracts all names from references in ``hreferences`` and adds
|
||||
404 responses for invalid references to ``multistatus``.
|
||||
If the whole collections is referenced ``collection_requested``
|
||||
gets set to ``True``."""
|
||||
nonlocal collection_requested
|
||||
for hreference in hreferences:
|
||||
try:
|
||||
name = name_from_path(hreference, collection)
|
||||
except ValueError as e:
|
||||
collection.logger.warning("Skipping invalid path %r in REPORT "
|
||||
"request on %r: %s", hreference, path, e)
|
||||
collection.logger.warning(
|
||||
"Skipping invalid path %r in REPORT request on %r: %s",
|
||||
hreference, path, e)
|
||||
response = _item_response(base_prefix, hreference,
|
||||
found_item=False)
|
||||
multistatus.append(response)
|
||||
continue
|
||||
if name:
|
||||
# Reference is an item
|
||||
item = collection.get(name)
|
||||
if not item:
|
||||
response = _item_response(base_prefix, hreference,
|
||||
found_item=False)
|
||||
multistatus.append(response)
|
||||
continue
|
||||
items = [item]
|
||||
yield name
|
||||
else:
|
||||
# Reference is a collection
|
||||
items = collection.pre_filtered_list(filters)
|
||||
collection_requested = True
|
||||
|
||||
for item in items:
|
||||
for name, item in collection.get_multi2(get_names()):
|
||||
if not item:
|
||||
continue
|
||||
if filters:
|
||||
try:
|
||||
match = (_comp_match
|
||||
if collection.get_meta("tag") == "VCALENDAR"
|
||||
uri = "/" + posixpath.join(collection.path, name)
|
||||
response = _item_response(base_prefix, uri,
|
||||
found_item=False)
|
||||
multistatus.append(response)
|
||||
else:
|
||||
yield item, False
|
||||
if collection_requested:
|
||||
yield from collection.get_all_filtered(filters)
|
||||
|
||||
for item, filters_matched in retrieve_items(collection, hreferences,
|
||||
multistatus):
|
||||
if filters and not filters_matched:
|
||||
match = (
|
||||
_comp_match if collection.get_meta("tag") == "VCALENDAR"
|
||||
else _prop_match)
|
||||
try:
|
||||
if not all(match(item, filter_[0]) for filter_ in filters
|
||||
if filter_):
|
||||
continue
|
||||
except Exception as e:
|
||||
raise RuntimeError("Failed to filter item %r from %r: %s" %
|
||||
(collection.path, item.href, e)) from e
|
||||
(item.href, collection.path, e)) from e
|
||||
|
||||
found_props = []
|
||||
not_found_props = []
|
||||
|
Loading…
Reference in New Issue
Block a user