Make Radicale fast (#569)

* Change get_multi to also return missing items

get_multi is not used anywhere and this makes it easier to use.

* Use get_multi for report requests

* Add get_all to BaseCollection

This can be used for optimization on multifilesystem.

* Use iterator for files

* Remove unnecessary checks

This never happens and would be an error.

* Don't raise exception when calling get with colliding name

This behavior is wrong, it should be handled as if the file doesn't exist.

* Use get_all and get_multi to skip unnecessary checks

Collision checks are slow on big collections.

* Use exception instead of existence checks

It's a bit faster.

* Use os.scandir instead of os.listdir

It's faster and doesn't load all files at once.

* Cache metadata when storage is read-only

Metadata is queried a lot during a request. It's quiet slow to load and parse the file every time.

* Cache the etag when the storage is read-only

The etag is calculated twice for GET requests on collections.

* Add helper method for cleaning caches

* Use item etags to calculate collection etag

It's very slow and unnecessary to parse all files with VObject and serialize them again.

* Cache serialized collections in file system

Serialization is very slow for big collections. This caches the result in a file.

* Add helper function for prefilters

The simplify_prefilters functions converts XML filters to a simple tag and time range, which can be easily matched against the tag and time range that are extracted from vobject_items by the function find_tag_and_time_range.

* Add ability to cache etag and serialization of item

Parsing items with vobject is very slow and not required for many requests.
Caching can be used to speed it up.

* Cache metadata and serialization from items in file system

Store the serialized text and the tag and time range from vobject_items in the cache.
The metadata is used for prefilters.

* Remove the cache for the serialization of collections

* Serialize calendars without vobject

Merge the calendar components manually. This is much faster and requires less memory. Caching of the result is not required anymore.

* Allow pre_filtered_list to indicate that filters match

The storage backend can indicate that it evaluated the filters completely.

* Skip filtering with vobject if prefiltering is sufficient

``simplify_prefilters`` indicates if the simplified condition is identical to ``filters``.
This is used in the multifilesystem backend to detect if prefiltering is sufficient.

* Make constants global

* Use generator expressions

* Only extract elements from inside of VCALENDAR

This is unnecessary at the moment, the text representation should never contain anything but VCALENDAR.

* Improve comments

* restore backward compatiblity

* Small improvements for fastbackend
This commit is contained in:
Unrud 2017-06-02 14:14:55 +02:00 committed by GitHub
parent 78a62aee86
commit 9ceae0a751
2 changed files with 602 additions and 187 deletions

View File

@ -27,7 +27,6 @@ entry.
import binascii import binascii
import contextlib import contextlib
import datetime
import errno import errno
import json import json
import os import os
@ -36,6 +35,7 @@ import posixpath
import shlex import shlex
import stat import stat
import subprocess import subprocess
import sys
import threading import threading
import time import time
from contextlib import contextmanager from contextlib import contextmanager
@ -47,6 +47,10 @@ from tempfile import NamedTemporaryFile, TemporaryDirectory
import vobject import vobject
if sys.version_info >= (3, 5):
# HACK: Avoid import cycle for Python < 3.5
from . import xmlutils
if os.name == "nt": if os.name == "nt":
import ctypes import ctypes
import ctypes.wintypes import ctypes.wintypes
@ -89,6 +93,10 @@ elif os.name == "posix":
def load(configuration, logger): def load(configuration, logger):
"""Load the storage manager chosen in configuration.""" """Load the storage manager chosen in configuration."""
if sys.version_info < (3, 5):
# HACK: Avoid import cycle for Python < 3.5
global xmlutils
from . import xmlutils
storage_type = configuration.get("storage", "type") storage_type = configuration.get("storage", "type")
if storage_type == "multifilesystem": if storage_type == "multifilesystem":
collection_class = Collection collection_class = Collection
@ -107,6 +115,27 @@ def load(configuration, logger):
return CollectionCopy return CollectionCopy
def scandir(path, only_dirs=False, only_files=False):
"""Iterator for directory elements. (For compatibility with Python < 3.5)
``only_dirs`` only return directories
``only_files`` only return files
"""
if sys.version_info >= (3, 5):
for entry in os.scandir(path):
if ((not only_files or entry.is_file()) and
(not only_dirs or entry.is_dir())):
yield entry.name
else:
for name in os.listdir(path):
p = os.path.join(path, name)
if ((not only_files or os.path.isfile(p)) and
(not only_dirs or os.path.isdir(p))):
yield name
def get_etag(text): def get_etag(text):
"""Etag from collection or item. """Etag from collection or item.
@ -183,8 +212,8 @@ def path_to_filesystem(root, *paths):
safe_path = os.path.join(safe_path, part) safe_path = os.path.join(safe_path, part)
# Check for conflicting files (e.g. case-insensitive file systems # Check for conflicting files (e.g. case-insensitive file systems
# or short names on Windows file systems) # or short names on Windows file systems)
if os.path.lexists(safe_path): if (os.path.lexists(safe_path) and
if part not in os.listdir(safe_path_parent): part not in scandir(safe_path_parent)):
raise CollidingPathError(part) raise CollidingPathError(part)
return safe_path return safe_path
@ -214,19 +243,57 @@ class ComponentNotFoundError(ValueError):
class Item: class Item:
def __init__(self, collection, item, href, last_modified=None): def __init__(self, collection, item=None, href=None, last_modified=None,
text=None, etag=None):
"""Initialize an item.
``collection`` the parent collection.
``href`` the href of the item.
``last_modified`` the HTTP-datetime of when the item was modified.
``text`` the text representation of the item (optional if ``item`` is
set).
``item`` the vobject item (optional if ``text`` is set).
``etag`` the etag of the item (optional). See ``get_etag``.
"""
if text is None and item is None:
raise ValueError("at least one of 'text' or 'item' must be set")
self.collection = collection self.collection = collection
self.item = item
self.href = href self.href = href
self.last_modified = last_modified self.last_modified = last_modified
self._text = text
self._item = item
self._etag = etag
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.item, attr) return getattr(self.item, attr)
def serialize(self):
if self._text is None:
self._text = self.item.serialize()
return self._text
@property
def item(self):
if self._item is None:
try:
self._item = vobject.readOne(self._text)
except Exception as e:
raise RuntimeError("Failed to parse item %r in %r" %
(self.href, self.collection.path)) from e
return self._item
@property @property
def etag(self): def etag(self):
"""Encoded as quoted-string (see RFC 2616).""" """Encoded as quoted-string (see RFC 2616)."""
return get_etag(self.serialize()) if self._etag is None:
self._etag = get_etag(self.serialize())
return self._etag
class BaseCollection: class BaseCollection:
@ -331,21 +398,54 @@ class BaseCollection:
def get_multi(self, hrefs): def get_multi(self, hrefs):
"""Fetch multiple items. Duplicate hrefs must be ignored. """Fetch multiple items. Duplicate hrefs must be ignored.
DEPRECATED: use ``get_multi2`` instead
"""
return (self.get(href) for href in set(hrefs))
def get_multi2(self, hrefs):
"""Fetch multiple items.
Functionally similar to ``get``, but might bring performance benefits
on some storages when used cleverly. It's not required to return the
requested items in the correct order. Duplicated hrefs can be ignored.
Returns tuples with the href and the item or None if the item doesn't
exist.
"""
return ((href, self.get(href)) for href in hrefs)
def get_all(self):
"""Fetch all items.
Functionally similar to ``get``, but might bring performance benefits Functionally similar to ``get``, but might bring performance benefits
on some storages when used cleverly. on some storages when used cleverly.
""" """
for href in set(hrefs): return map(self.get, self.list())
yield self.get(href)
def get_all_filtered(self, filters):
"""Fetch all items with optional filtering.
This can largely improve performance of reports depending on
the filters and this implementation.
Returns tuples in the form ``(item, filters_matched)``.
``filters_matched`` is a bool that indicates if ``filters`` are fully
matched.
This returns all events by default
"""
return ((item, False) for item in self.get_all())
def pre_filtered_list(self, filters): def pre_filtered_list(self, filters):
"""List collection items with optional pre filtering. """List collection items with optional pre filtering.
This could largely improve performance of reports depending on DEPRECATED: use ``get_all_filtered`` instead
the filters and this implementation.
This returns all event by default
""" """
return [self.get(href) for href in self.list()] return self.get_all()
def has(self, href): def has(self, href):
"""Check if an item exists by its href. """Check if an item exists by its href.
@ -414,6 +514,8 @@ class Collection(BaseCollection):
split_path = self.path.split("/") split_path = self.path.split("/")
self.owner = split_path[0] if len(split_path) > 1 else None self.owner = split_path[0] if len(split_path) > 1 else None
self.is_principal = principal self.is_principal = principal
self._meta = None
self._etag = None
@classmethod @classmethod
def _get_collection_root_folder(cls): def _get_collection_root_folder(cls):
@ -533,14 +635,12 @@ class Collection(BaseCollection):
for item in collection.list(): for item in collection.list():
yield collection.get(item) yield collection.get(item)
for href in os.listdir(filesystem_path): for href in scandir(filesystem_path, only_dirs=True):
if not is_safe_filesystem_path_component(href): if not is_safe_filesystem_path_component(href):
if not href.startswith(".Radicale"): if not href.startswith(".Radicale"):
cls.logger.debug("Skipping collection %r in %r", href, cls.logger.debug("Skipping collection %r in %r", href,
path) path)
continue continue
child_filesystem_path = path_to_filesystem(filesystem_path, href)
if os.path.isdir(child_filesystem_path):
child_path = posixpath.join(path, href) child_path = posixpath.join(path, href)
child_principal = len(attributes) == 0 child_principal = len(attributes) == 0
yield cls(child_path, child_principal) yield cls(child_path, child_principal)
@ -724,7 +824,7 @@ class Collection(BaseCollection):
history_folder = os.path.join(self._filesystem_path, history_folder = os.path.join(self._filesystem_path,
".Radicale.cache", "history") ".Radicale.cache", "history")
try: try:
for href in os.listdir(history_folder): for href in scandir(history_folder):
if not is_safe_filesystem_path_component(href): if not is_safe_filesystem_path_component(href):
continue continue
if os.path.isfile(os.path.join(self._filesystem_path, href)): if os.path.isfile(os.path.join(self._filesystem_path, href)):
@ -766,7 +866,7 @@ class Collection(BaseCollection):
token_name_hash = md5() token_name_hash = md5()
# Find the history of all existing and deleted items # Find the history of all existing and deleted items
for href, item in chain( for href, item in chain(
((item.href, item) for item in self.pre_filtered_list(())), ((item.href, item) for item in self.get_all()),
((href, None) for href in self._get_deleted_history_hrefs())): ((href, None) for href in self._get_deleted_history_hrefs())):
history_etag = self._update_history_etag(href, item) history_etag = self._update_history_etag(href, item)
state[href] = history_etag state[href] = history_etag
@ -835,43 +935,135 @@ class Collection(BaseCollection):
return token, changes return token, changes
def list(self): def list(self):
for href in os.listdir(self._filesystem_path): for href in scandir(self._filesystem_path, only_files=True):
if not is_safe_filesystem_path_component(href): if not is_safe_filesystem_path_component(href):
if not href.startswith(".Radicale"): if not href.startswith(".Radicale"):
self.logger.debug( self.logger.debug(
"Skipping item %r in %r", href, self.path) "Skipping item %r in %r", href, self.path)
continue continue
path = os.path.join(self._filesystem_path, href)
if os.path.isfile(path):
yield href yield href
def get(self, href): _item_cache_cleaned = False
if not href:
return None def get(self, href, verify_href=True):
item, metadata = self._get_with_metadata(href, verify_href=verify_href)
return item
def _get_with_metadata(self, href, verify_href=True):
# Like ``get`` but additonally returns the following metadata:
# tag, start, end: see ``xmlutils.find_tag_and_time_range``
if verify_href:
try:
if not is_safe_filesystem_path_component(href): if not is_safe_filesystem_path_component(href):
self.logger.debug("Can't translate name %r safely to filesystem " raise UnsafePathError(href)
"in %r", href, self.path)
return None
path = path_to_filesystem(self._filesystem_path, href) path = path_to_filesystem(self._filesystem_path, href)
if not os.path.isfile(path): except ValueError as e:
return None self.logger.debug(
with open(path, encoding=self.encoding, newline="") as f: "Can't translate name %r safely to filesystem in %r: %s",
text = f.read() href, self.path, e, exc_info=True)
return None, None
else:
path = os.path.join(self._filesystem_path, href)
try:
with open(path, "rb") as f:
btext = f.read()
except (FileNotFoundError, IsADirectoryError):
return None, None
# The hash of the component in the file system. This is used to check,
# if the entry in the cache is still valid.
input_hash = md5()
input_hash.update(btext)
input_hash = input_hash.hexdigest()
cache_folder = os.path.join(self._filesystem_path, ".Radicale.cache",
"item")
try:
with open(os.path.join(cache_folder, href), "rb") as f:
cinput_hash, cetag, ctext, ctag, cstart, cend = pickle.load(f)
except (FileNotFoundError, pickle.UnpicklingError, ValueError) as e:
if isinstance(e, (pickle.UnpicklingError, ValueError)):
self.logger.warning(
"Failed to load item cache entry %r in %r: %s",
href, self.path, e, exc_info=True)
cinput_hash = cetag = ctext = ctag = cstart = cend = None
vobject_item = None
if input_hash != cinput_hash:
vobject_item = Item(self, href=href,
text=btext.decode(self.encoding)).item
# Serialize the object again, to normalize the text representation.
# The storage may have been edited externally.
ctext = vobject_item.serialize()
cetag = get_etag(ctext)
try:
ctag, cstart, cend = xmlutils.find_tag_and_time_range(
vobject_item)
except Exception as e:
raise RuntimeError("Failed to find tag and time range of item "
"%r from %r: %s" % (href, self.path,
e)) from e
self._makedirs_synced(cache_folder)
try:
# Race: Other processes might have created and locked the
# file.
with self._atomic_write(os.path.join(cache_folder, href),
"wb") as f:
pickle.dump((input_hash, cetag, ctext,
ctag, cstart, cend), f)
except PermissionError:
pass
# Clean cache entries (max once per request)
# This happens once after new uploads, or if the data in the
# file system was edited externally.
if not self._item_cache_cleaned:
self._item_cache_cleaned = True
self._clean_cache(cache_folder, (
href for href in scandir(cache_folder) if not
os.path.isfile(os.path.join(self._filesystem_path, href))))
last_modified = time.strftime( last_modified = time.strftime(
"%a, %d %b %Y %H:%M:%S GMT", "%a, %d %b %Y %H:%M:%S GMT",
time.gmtime(os.path.getmtime(path))) time.gmtime(os.path.getmtime(path)))
try: return Item(self, href=href, last_modified=last_modified, etag=cetag,
item = vobject.readOne(text) text=ctext, item=vobject_item), (ctag, cstart, cend)
except Exception as e:
raise RuntimeError("Failed to parse item %r in %r" % def get_multi2(self, hrefs):
(href, self.path)) from e # It's faster to check for file name collissions here, because
return Item(self, item, href, last_modified) # we only need to call os.listdir once.
files = None
for href in hrefs:
if files is None:
# List dir after hrefs returned one item, the iterator may be
# empty and the for-loop is never executed.
files = os.listdir(self._filesystem_path)
path = os.path.join(self._filesystem_path, href)
if (not is_safe_filesystem_path_component(href) or
href not in files and os.path.lexists(path)):
self.logger.debug(
"Can't translate name safely to filesystem: %r", href)
yield (href, None)
else:
yield (href, self.get(href, verify_href=False))
def get_all(self):
# We don't need to check for collissions, because the the file names
# are from os.listdir.
return (self.get(href, verify_href=False) for href in self.list())
def get_all_filtered(self, filters):
tag, start, end, simple = xmlutils.simplify_prefilters(filters)
if not tag:
# no filter
yield from ((item, simple) for item in self.get_all())
return
for item, (itag, istart, iend) in (
self._get_with_metadata(href, verify_href=False)
for href in self.list()):
if tag == itag and istart < end and iend > start:
yield item, simple and (start <= istart or iend <= end)
def upload(self, href, vobject_item): def upload(self, href, vobject_item):
if not is_safe_filesystem_path_component(href): if not is_safe_filesystem_path_component(href):
raise UnsafePathError(href) raise UnsafePathError(href)
path = path_to_filesystem(self._filesystem_path, href) path = path_to_filesystem(self._filesystem_path, href)
item = Item(self, vobject_item, href) item = Item(self, href=href, item=vobject_item)
with self._atomic_write(path, newline="") as fd: with self._atomic_write(path, newline="") as fd:
fd.write(item.serialize()) fd.write(item.serialize())
# Track the change # Track the change
@ -907,57 +1099,101 @@ class Collection(BaseCollection):
self._clean_history_cache() self._clean_history_cache()
def get_meta(self, key=None): def get_meta(self, key=None):
if os.path.exists(self._props_path): # reuse cached value if the storage is read-only
with open(self._props_path, encoding=self.encoding) as f: if self._writer or self._meta is None:
try: try:
meta = json.load(f) with open(self._props_path, encoding=self.encoding) as f:
self._meta = json.load(f)
except FileNotFoundError:
self._meta = {}
except ValueError as e: except ValueError as e:
raise RuntimeError("Failed to load properties of collect" raise RuntimeError("Failed to load properties of collect"
"ion %r: %s" % (self.path, e)) from e "ion %r: %s" % (self.path, e)) from e
return meta.get(key) if key else meta return self._meta.get(key) if key else self._meta
def set_meta(self, props): def set_meta(self, props):
if os.path.exists(self._props_path): new_props = self.get_meta()
with open(self._props_path, encoding=self.encoding) as f: new_props.update(props)
old_props = json.load(f) for key in tuple(new_props.keys()):
old_props.update(props) if not new_props[key]:
props = old_props del new_props[key]
props = {key: value for key, value in props.items() if value} with self._atomic_write(self._props_path, "w") as f:
with self._atomic_write(self._props_path, "w+") as f: json.dump(new_props, f)
json.dump(props, f)
@property @property
def last_modified(self): def last_modified(self):
relevant_files = [self._filesystem_path] + [ relevant_files = chain(
path_to_filesystem(self._filesystem_path, href) (self._filesystem_path,),
for href in self.list()] (self._props_path,) if os.path.exists(self._props_path) else (),
if os.path.exists(self._props_path): (os.path.join(self._filesystem_path, h) for h in self.list()))
relevant_files.append(self._props_path)
last = max(map(os.path.getmtime, relevant_files)) last = max(map(os.path.getmtime, relevant_files))
return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(last)) return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(last))
def serialize(self): def serialize(self):
items = [] # serialize collection
time_begin = datetime.datetime.now()
for href in self.list():
items.append(self.get(href).item)
time_end = datetime.datetime.now()
self.logger.info(
"Read %d items in %.3f seconds from %r", len(items),
(time_end - time_begin).total_seconds(), self.path)
if self.get_meta("tag") == "VCALENDAR": if self.get_meta("tag") == "VCALENDAR":
collection = vobject.iCalendar() in_vcalendar = False
for item in items: vtimezones = ""
for content in ("vevent", "vtodo", "vjournal"): included_tzids = set()
if content in item.contents: vtimezone = []
for item_part in getattr(item, "%s_list" % content): tzid = None
collection.add(item_part) components = ""
break # Concatenate all child elements of VCALENDAR from all items
return collection.serialize() # together, while preventing duplicated VTIMEZONE entries.
# VTIMEZONEs are only distinguished by their TZID, if different
# timezones share the same TZID this produces errornous ouput.
# VObject fails at this too.
for item in self.get_all():
depth = 0
for line in item.serialize().split("\r\n"):
if line.startswith("BEGIN:"):
depth += 1
if depth == 1 and line == "BEGIN:VCALENDAR":
in_vcalendar = True
elif in_vcalendar:
if depth == 1 and line.startswith("END:"):
in_vcalendar = False
if depth == 2 and line == "BEGIN:VTIMEZONE":
vtimezone.append(line)
elif vtimezone:
vtimezone.append(line)
if depth == 2 and line.startswith("TZID:"):
tzid = line[len("TZID:"):]
elif depth == 2 and line.startswith("END:"):
if tzid is None or tzid not in included_tzids:
if vtimezones:
vtimezones += "\r\n"
vtimezones += "\r\n".join(vtimezone)
included_tzids.add(tzid)
vtimezone.clear()
tzid = None
elif depth >= 2:
if components:
components += "\r\n"
components += line
if line.startswith("END:"):
depth -= 1
return "\r\n".join(filter(bool, (
"BEGIN:VCALENDAR",
"VERSION:2.0",
"PRODID:-//PYVOBJECT//NONSGML Version 1//EN",
vtimezones,
components,
"END:VCALENDAR")))
elif self.get_meta("tag") == "VADDRESSBOOK": elif self.get_meta("tag") == "VADDRESSBOOK":
return "".join([item.serialize() for item in items]) return "".join((item.serialize() for item in self.get_all()))
return "" return ""
@property
def etag(self):
# reuse cached value if the storage is read-only
if self._writer or self._etag is None:
etag = md5()
for item in self.get_all():
etag.update((item.href + "/" + item.etag).encode("utf-8"))
self._etag = '"%s"' % etag.hexdigest()
return self._etag
_lock = threading.Lock() _lock = threading.Lock()
_waiters = [] _waiters = []
_lock_file = None _lock_file = None

View File

@ -26,12 +26,14 @@ in them for XML requests (all but PUT).
""" """
import copy import copy
import math
import posixpath import posixpath
import re import re
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from collections import OrderedDict from collections import OrderedDict
from datetime import datetime, timedelta, timezone from datetime import date, datetime, timedelta, timezone
from http import client from http import client
from itertools import chain
from urllib.parse import quote, unquote, urlparse from urllib.parse import quote, unquote, urlparse
from . import storage from . import storage
@ -56,6 +58,13 @@ for short, url in NAMESPACES.items():
CLARK_TAG_REGEX = re.compile(r"{(?P<namespace>[^}]*)}(?P<tag>.*)", re.VERBOSE) CLARK_TAG_REGEX = re.compile(r"{(?P<namespace>[^}]*)}(?P<tag>.*)", re.VERBOSE)
HUMAN_REGEX = re.compile(r"(?P<namespace>[^:{}]*)(?P<tag>.*)", re.VERBOSE) HUMAN_REGEX = re.compile(r"(?P<namespace>[^:{}]*)(?P<tag>.*)", re.VERBOSE)
DAY = timedelta(days=1)
SECOND = timedelta(seconds=1)
DATETIME_MIN = datetime.min.replace(tzinfo=timezone.utc)
DATETIME_MAX = datetime.max.replace(tzinfo=timezone.utc)
TIMESTAMP_MIN = math.floor(DATETIME_MIN.timestamp())
TIMESTAMP_MAX = math.ceil(DATETIME_MAX.timestamp())
def pretty_xml(element, level=0): def pretty_xml(element, level=0):
"""Indent an ElementTree ``element`` and its children.""" """Indent an ElementTree ``element`` and its children."""
@ -210,11 +219,9 @@ def _prop_match(item, filter_):
def _time_range_match(vobject_item, filter_, child_name): def _time_range_match(vobject_item, filter_, child_name):
"""Check whether the ``item`` matches the time-range ``filter_``. """Check whether the component/property ``child_name`` of
``vobject_item`` matches the time-range ``filter_``."""
See rfc4791-9.9.
"""
start = filter_.get("start") start = filter_.get("start")
end = filter_.get("end") end = filter_.get("end")
if not start and not end: if not start and not end:
@ -229,14 +236,53 @@ def _time_range_match(vobject_item, filter_, child_name):
end = datetime.max end = datetime.max
start = start.replace(tzinfo=timezone.utc) start = start.replace(tzinfo=timezone.utc)
end = end.replace(tzinfo=timezone.utc) end = end.replace(tzinfo=timezone.utc)
child = getattr(vobject_item, child_name.lower())
matched = False
def range_fn(range_start, range_end):
nonlocal matched
if start < range_end and range_start < end:
matched = True
return True
if end < range_start:
return True
return False
def infinity_fn(start):
return False
_visit_time_ranges(vobject_item, child_name, range_fn, infinity_fn)
return matched
def _visit_time_ranges(vobject_item, child_name, range_fn, infinity_fn):
"""Visit all time ranges in the component/property ``child_name`` of
`vobject_item`` with visitors ``range_fn`` and ``infinity_fn``.
``range_fn`` gets called for every time_range with ``start`` and ``end``
datetimes as arguments. If the function returns True, the operation is
cancelled.
``infinity_fn`` gets called when an infiite recurrence rule is detected
with ``start`` datetime as argument. If the function returns True, the
operation is cancelled.
See rfc4791-9.9.
"""
child = getattr(vobject_item, child_name.lower())
# Comments give the lines in the tables of the specification # Comments give the lines in the tables of the specification
if child_name == "VEVENT": if child_name == "VEVENT":
# TODO: check if there's a timezone # TODO: check if there's a timezone
dtstart = child.dtstart.value dtstart = child.dtstart.value
if child.rruleset: if child.rruleset:
if (";UNTIL=" not in child.rrule.value and
";COUNT=" not in child.rrule.value):
for dtstart in child.getrruleset(addRDate=True):
if infinity_fn(_date_to_datetime(dtstart)):
return
break
dtstarts = child.getrruleset(addRDate=True) dtstarts = child.getrruleset(addRDate=True)
else: else:
dtstarts = (dtstart,) dtstarts = (dtstart,)
@ -255,31 +301,30 @@ def _time_range_match(vobject_item, filter_, child_name):
dtstart_is_datetime = isinstance(dtstart, datetime) dtstart_is_datetime = isinstance(dtstart, datetime)
dtstart = _date_to_datetime(dtstart) dtstart = _date_to_datetime(dtstart)
if dtstart > end:
break
if dtend is not None: if dtend is not None:
# Line 1 # Line 1
dtend = dtstart + timedelta(seconds=original_duration) dtend = dtstart + timedelta(seconds=original_duration)
if start < dtend and end > dtstart: if range_fn(dtstart, dtend):
return True return
elif duration is not None: elif duration is not None:
if original_duration is None: if original_duration is None:
original_duration = duration.seconds original_duration = duration.seconds
if duration.seconds > 0: if duration.seconds > 0:
# Line 2 # Line 2
if start < dtstart + duration and end > dtstart: if range_fn(dtstart, dtstart + duration):
return True return
elif start <= dtstart and end > dtstart: else:
# Line 3 # Line 3
return True if range_fn(dtstart, dtstart + SECOND):
return
elif dtstart_is_datetime: elif dtstart_is_datetime:
# Line 4 # Line 4
if start <= dtstart and end > dtstart: if range_fn(dtstart, dtstart + SECOND):
return True return
elif start < dtstart + timedelta(days=1) and end > dtstart: else:
# Line 5 # Line 5
return True if range_fn(dtstart, dtstart + DAY):
return
elif child_name == "VTODO": elif child_name == "VTODO":
dtstart = getattr(child, "dtstart", None) dtstart = getattr(child, "dtstart", None)
@ -305,6 +350,12 @@ def _time_range_match(vobject_item, filter_, child_name):
created = _date_to_datetime(created.value) created = _date_to_datetime(created.value)
if child.rruleset: if child.rruleset:
if (";UNTIL=" not in child.rrule.value and
";COUNT=" not in child.rrule.value):
for reference_date in child.getrruleset(addRDate=True):
if infinity_fn(_date_to_datetime(reference_date)):
return
break
reference_dates = child.getrruleset(addRDate=True) reference_dates = child.getrruleset(addRDate=True)
else: else:
if dtstart is not None: if dtstart is not None:
@ -317,47 +368,56 @@ def _time_range_match(vobject_item, filter_, child_name):
reference_dates = (created,) reference_dates = (created,)
else: else:
# Line 8 # Line 8
return True if range_fn(DATETIME_MIN, DATETIME_MAX):
return
reference_dates = ()
for reference_date in reference_dates: for reference_date in reference_dates:
reference_date = _date_to_datetime(reference_date) reference_date = _date_to_datetime(reference_date)
if reference_date > end:
break
if dtstart is not None and duration is not None: if dtstart is not None and duration is not None:
# Line 1 # Line 1
if start <= reference_date + duration and ( if range_fn(reference_date,
end > reference_date or reference_date + duration + SECOND):
end >= reference_date + duration): return
return True if range_fn(reference_date + duration - SECOND,
reference_date + duration + SECOND):
return
elif dtstart is not None and due is not None: elif dtstart is not None and due is not None:
# Line 2 # Line 2
due = reference_date + timedelta(seconds=original_duration) due = reference_date + timedelta(seconds=original_duration)
if (start < due or start <= reference_date) and ( if (range_fn(reference_date, due) or
end > reference_date or end >= due): range_fn(reference_date, reference_date + SECOND) or
return True range_fn(due - SECOND, due) or
range_fn(due - SECOND, reference_date + SECOND)):
return
elif dtstart is not None: elif dtstart is not None:
if start <= reference_date and end > reference_date: if range_fn(reference_date, reference_date + SECOND):
return True return
elif due is not None: elif due is not None:
# Line 4 # Line 4
if start < reference_date and end >= reference_date: if range_fn(reference_date - SECOND, reference_date):
return True return
elif completed is not None and created is not None: elif completed is not None and created is not None:
# Line 5 # Line 5
completed = reference_date + timedelta( completed = reference_date + timedelta(
seconds=original_duration) seconds=original_duration)
if (start <= reference_date or start <= completed) and ( if (range_fn(reference_date - SECOND,
end >= reference_date or end >= completed): reference_date + SECOND) or
return True range_fn(completed - SECOND, completed + SECOND) or
range_fn(reference_date - SECOND,
reference_date + SECOND) or
range_fn(completed - SECOND, completed + SECOND)):
return
elif completed is not None: elif completed is not None:
# Line 6 # Line 6
if start <= reference_date and end >= reference_date: if range_fn(reference_date - SECOND,
return True reference_date + SECOND):
return
elif created is not None: elif created is not None:
# Line 7 # Line 7
if end > reference_date: if range_fn(reference_date, DATETIME_MAX):
return True return
elif child_name == "VJOURNAL": elif child_name == "VJOURNAL":
dtstart = getattr(child, "dtstart", None) dtstart = getattr(child, "dtstart", None)
@ -365,6 +425,12 @@ def _time_range_match(vobject_item, filter_, child_name):
if dtstart is not None: if dtstart is not None:
dtstart = dtstart.value dtstart = dtstart.value
if child.rruleset: if child.rruleset:
if (";UNTIL=" not in child.rrule.value and
";COUNT=" not in child.rrule.value):
for dtstart in child.getrruleset(addRDate=True):
if infinity_fn(_date_to_datetime(dtstart)):
return
break
dtstarts = child.getrruleset(addRDate=True) dtstarts = child.getrruleset(addRDate=True)
else: else:
dtstarts = (dtstart,) dtstarts = (dtstart,)
@ -373,18 +439,21 @@ def _time_range_match(vobject_item, filter_, child_name):
dtstart_is_datetime = isinstance(dtstart, datetime) dtstart_is_datetime = isinstance(dtstart, datetime)
dtstart = _date_to_datetime(dtstart) dtstart = _date_to_datetime(dtstart)
if dtstart > end:
break
if dtstart_is_datetime: if dtstart_is_datetime:
# Line 1 # Line 1
if start <= dtstart and end > dtstart: if range_fn(dtstart, dtstart + SECOND):
return True return
elif start < dtstart + timedelta(days=1) and end > dtstart: else:
# Line 2 # Line 2
return True if range_fn(dtstart, dtstart + DAY):
return
return False elif isinstance(child, date):
if range_fn(child, child + DAY):
return
elif isinstance(child, datetime):
if range_fn(child, child + SECOND):
return
def _text_match(vobject_item, filter_, child_name, attrib_name=None): def _text_match(vobject_item, filter_, child_name, attrib_name=None):
@ -429,6 +498,99 @@ def _param_filter_match(vobject_item, filter_, parent_name):
return condition return condition
def simplify_prefilters(filters):
"""Creates a simplified condition from ``filters``.
Returns a tuple (``tag``, ``start``, ``end``, ``simple``) where ``tag`` is
a string or None (match all) and ``start`` and ``end`` are POSIX
timestamps (as int). ``simple`` is a bool that indicates that ``filters``
and the simplified condition are identical.
"""
flat_filters = tuple(chain.from_iterable(filters))
simple = len(flat_filters) <= 1
for col_filter in flat_filters:
if (col_filter.tag != _tag("C", "comp-filter") or
col_filter.get("name") != "VCALENDAR"):
simple = False
continue
simple &= len(col_filter) <= 1
for comp_filter in col_filter:
if comp_filter.tag != _tag("C", "comp-filter"):
simple = False
continue
tag = comp_filter.get("name")
if (tag not in ("VTODO", "VEVENT", "VJOURNAL") or comp_filter.find(
_tag("C", "is-not-defined")) is not None):
simple = False
continue
simple &= len(comp_filter) <= 1
for time_filter in comp_filter:
if time_filter.tag != _tag("C", "time-range"):
simple = False
continue
start = time_filter.get("start")
end = time_filter.get("end")
if start:
start = math.floor(datetime.strptime(
start, "%Y%m%dT%H%M%SZ").replace(
tzinfo=timezone.utc).timestamp())
else:
start = TIMESTAMP_MIN
if end:
end = math.ceil(datetime.strptime(
end, "%Y%m%dT%H%M%SZ").replace(
tzinfo=timezone.utc).timestamp())
else:
end = TIMESTAMP_MAX
return tag, start, end, simple
return tag, TIMESTAMP_MIN, TIMESTAMP_MAX, simple
return None, TIMESTAMP_MIN, TIMESTAMP_MAX, simple
def find_tag_and_time_range(vobject_item):
"""Find tag and enclosing time range from ``vobject item``.
Returns a tuple (``tag``, ``start``, ``end``) where ``tag`` is a string
and ``start`` and ``end`` are POSIX timestamps (as int).
This is intened to be used for matching against simplified prefilters.
"""
tag = ""
if vobject_item.name == "VCALENDAR":
for component in vobject_item.components():
if component.name in ("VTODO", "VEVENT", "VJOURNAL"):
tag = component.name
break
if not tag:
return (None, math.floor(DATETIME_MIN.timestamp()),
math.ceil(DATETIME_MAX.timestamp()))
start = end = None
def range_fn(range_start, range_end):
nonlocal start, end
if start is None or range_start < start:
start = range_start
if end is None or end < range_end:
end = range_end
return False
def infinity_fn(range_start):
nonlocal start, end
if start is None or range_start < start:
start = range_start
end = DATETIME_MAX
return True
_visit_time_ranges(vobject_item, tag, range_fn, infinity_fn)
if start is None:
start = DATETIME_MIN
if end is None:
end = DATETIME_MAX
return tag, math.floor(start.timestamp()), math.ceil(end.timestamp())
def name_from_path(path, collection): def name_from_path(path, collection):
"""Return Radicale item name from ``path``.""" """Return Radicale item name from ``path``."""
path = path.strip("/") + "/" path = path.strip("/") + "/"
@ -891,43 +1053,60 @@ def report(base_prefix, path, xml_request, collection):
root.findall("./%s" % _tag("C", "filter")) + root.findall("./%s" % _tag("C", "filter")) +
root.findall("./%s" % _tag("CR", "filter"))) root.findall("./%s" % _tag("CR", "filter")))
def retrieve_items(collection, hreferences, multistatus):
"""Retrieves all items that are referenced in ``hreferences`` from
``collection`` and adds 404 responses for missing and invalid items
to ``multistatus``."""
collection_requested = False
def get_names():
"""Extracts all names from references in ``hreferences`` and adds
404 responses for invalid references to ``multistatus``.
If the whole collections is referenced ``collection_requested``
gets set to ``True``."""
nonlocal collection_requested
for hreference in hreferences: for hreference in hreferences:
try: try:
name = name_from_path(hreference, collection) name = name_from_path(hreference, collection)
except ValueError as e: except ValueError as e:
collection.logger.warning("Skipping invalid path %r in REPORT " collection.logger.warning(
"request on %r: %s", hreference, path, e) "Skipping invalid path %r in REPORT request on %r: %s",
hreference, path, e)
response = _item_response(base_prefix, hreference, response = _item_response(base_prefix, hreference,
found_item=False) found_item=False)
multistatus.append(response) multistatus.append(response)
continue continue
if name: if name:
# Reference is an item # Reference is an item
item = collection.get(name) yield name
if not item:
response = _item_response(base_prefix, hreference,
found_item=False)
multistatus.append(response)
continue
items = [item]
else: else:
# Reference is a collection # Reference is a collection
items = collection.pre_filtered_list(filters) collection_requested = True
for item in items: for name, item in collection.get_multi2(get_names()):
if not item: if not item:
continue uri = "/" + posixpath.join(collection.path, name)
if filters: response = _item_response(base_prefix, uri,
try: found_item=False)
match = (_comp_match multistatus.append(response)
if collection.get_meta("tag") == "VCALENDAR" else:
yield item, False
if collection_requested:
yield from collection.get_all_filtered(filters)
for item, filters_matched in retrieve_items(collection, hreferences,
multistatus):
if filters and not filters_matched:
match = (
_comp_match if collection.get_meta("tag") == "VCALENDAR"
else _prop_match) else _prop_match)
try:
if not all(match(item, filter_[0]) for filter_ in filters if not all(match(item, filter_[0]) for filter_ in filters
if filter_): if filter_):
continue continue
except Exception as e: except Exception as e:
raise RuntimeError("Failed to filter item %r from %r: %s" % raise RuntimeError("Failed to filter item %r from %r: %s" %
(collection.path, item.href, e)) from e (item.href, collection.path, e)) from e
found_props = [] found_props = []
not_found_props = [] not_found_props = []