Make Radicale fast (#569)

* Change get_multi to also return missing items

get_multi is not used anywhere and this makes it easier to use.

* Use get_multi for report requests

* Add get_all to BaseCollection

This can be used for optimization on multifilesystem.

* Use iterator for files

* Remove unnecessary checks

This never happens and would be an error.

* Don't raise exception when calling get with colliding name

This behavior is wrong, it should be handled as if the file doesn't exist.

* Use get_all and get_multi to skip unnecessary checks

Collision checks are slow on big collections.

* Use exception instead of existence checks

It's a bit faster.

* Use os.scandir instead of os.listdir

It's faster and doesn't load all files at once.

* Cache metadata when storage is read-only

Metadata is queried a lot during a request. It's quiet slow to load and parse the file every time.

* Cache the etag when the storage is read-only

The etag is calculated twice for GET requests on collections.

* Add helper method for cleaning caches

* Use item etags to calculate collection etag

It's very slow and unnecessary to parse all files with VObject and serialize them again.

* Cache serialized collections in file system

Serialization is very slow for big collections. This caches the result in a file.

* Add helper function for prefilters

The simplify_prefilters functions converts XML filters to a simple tag and time range, which can be easily matched against the tag and time range that are extracted from vobject_items by the function find_tag_and_time_range.

* Add ability to cache etag and serialization of item

Parsing items with vobject is very slow and not required for many requests.
Caching can be used to speed it up.

* Cache metadata and serialization from items in file system

Store the serialized text and the tag and time range from vobject_items in the cache.
The metadata is used for prefilters.

* Remove the cache for the serialization of collections

* Serialize calendars without vobject

Merge the calendar components manually. This is much faster and requires less memory. Caching of the result is not required anymore.

* Allow pre_filtered_list to indicate that filters match

The storage backend can indicate that it evaluated the filters completely.

* Skip filtering with vobject if prefiltering is sufficient

``simplify_prefilters`` indicates if the simplified condition is identical to ``filters``.
This is used in the multifilesystem backend to detect if prefiltering is sufficient.

* Make constants global

* Use generator expressions

* Only extract elements from inside of VCALENDAR

This is unnecessary at the moment, the text representation should never contain anything but VCALENDAR.

* Improve comments

* restore backward compatiblity

* Small improvements for fastbackend
This commit is contained in:
Unrud 2017-06-02 14:14:55 +02:00 committed by GitHub
parent 78a62aee86
commit 9ceae0a751
2 changed files with 602 additions and 187 deletions

View File

@ -27,7 +27,6 @@ entry.
import binascii
import contextlib
import datetime
import errno
import json
import os
@ -36,6 +35,7 @@ import posixpath
import shlex
import stat
import subprocess
import sys
import threading
import time
from contextlib import contextmanager
@ -47,6 +47,10 @@ from tempfile import NamedTemporaryFile, TemporaryDirectory
import vobject
if sys.version_info >= (3, 5):
# HACK: Avoid import cycle for Python < 3.5
from . import xmlutils
if os.name == "nt":
import ctypes
import ctypes.wintypes
@ -89,6 +93,10 @@ elif os.name == "posix":
def load(configuration, logger):
"""Load the storage manager chosen in configuration."""
if sys.version_info < (3, 5):
# HACK: Avoid import cycle for Python < 3.5
global xmlutils
from . import xmlutils
storage_type = configuration.get("storage", "type")
if storage_type == "multifilesystem":
collection_class = Collection
@ -107,6 +115,27 @@ def load(configuration, logger):
return CollectionCopy
def scandir(path, only_dirs=False, only_files=False):
"""Iterator for directory elements. (For compatibility with Python < 3.5)
``only_dirs`` only return directories
``only_files`` only return files
"""
if sys.version_info >= (3, 5):
for entry in os.scandir(path):
if ((not only_files or entry.is_file()) and
(not only_dirs or entry.is_dir())):
yield entry.name
else:
for name in os.listdir(path):
p = os.path.join(path, name)
if ((not only_files or os.path.isfile(p)) and
(not only_dirs or os.path.isdir(p))):
yield name
def get_etag(text):
"""Etag from collection or item.
@ -183,9 +212,9 @@ def path_to_filesystem(root, *paths):
safe_path = os.path.join(safe_path, part)
# Check for conflicting files (e.g. case-insensitive file systems
# or short names on Windows file systems)
if os.path.lexists(safe_path):
if part not in os.listdir(safe_path_parent):
raise CollidingPathError(part)
if (os.path.lexists(safe_path) and
part not in scandir(safe_path_parent)):
raise CollidingPathError(part)
return safe_path
@ -214,19 +243,57 @@ class ComponentNotFoundError(ValueError):
class Item:
def __init__(self, collection, item, href, last_modified=None):
def __init__(self, collection, item=None, href=None, last_modified=None,
text=None, etag=None):
"""Initialize an item.
``collection`` the parent collection.
``href`` the href of the item.
``last_modified`` the HTTP-datetime of when the item was modified.
``text`` the text representation of the item (optional if ``item`` is
set).
``item`` the vobject item (optional if ``text`` is set).
``etag`` the etag of the item (optional). See ``get_etag``.
"""
if text is None and item is None:
raise ValueError("at least one of 'text' or 'item' must be set")
self.collection = collection
self.item = item
self.href = href
self.last_modified = last_modified
self._text = text
self._item = item
self._etag = etag
def __getattr__(self, attr):
return getattr(self.item, attr)
def serialize(self):
if self._text is None:
self._text = self.item.serialize()
return self._text
@property
def item(self):
if self._item is None:
try:
self._item = vobject.readOne(self._text)
except Exception as e:
raise RuntimeError("Failed to parse item %r in %r" %
(self.href, self.collection.path)) from e
return self._item
@property
def etag(self):
"""Encoded as quoted-string (see RFC 2616)."""
return get_etag(self.serialize())
if self._etag is None:
self._etag = get_etag(self.serialize())
return self._etag
class BaseCollection:
@ -331,21 +398,54 @@ class BaseCollection:
def get_multi(self, hrefs):
"""Fetch multiple items. Duplicate hrefs must be ignored.
DEPRECATED: use ``get_multi2`` instead
"""
return (self.get(href) for href in set(hrefs))
def get_multi2(self, hrefs):
"""Fetch multiple items.
Functionally similar to ``get``, but might bring performance benefits
on some storages when used cleverly. It's not required to return the
requested items in the correct order. Duplicated hrefs can be ignored.
Returns tuples with the href and the item or None if the item doesn't
exist.
"""
return ((href, self.get(href)) for href in hrefs)
def get_all(self):
"""Fetch all items.
Functionally similar to ``get``, but might bring performance benefits
on some storages when used cleverly.
"""
for href in set(hrefs):
yield self.get(href)
return map(self.get, self.list())
def get_all_filtered(self, filters):
"""Fetch all items with optional filtering.
This can largely improve performance of reports depending on
the filters and this implementation.
Returns tuples in the form ``(item, filters_matched)``.
``filters_matched`` is a bool that indicates if ``filters`` are fully
matched.
This returns all events by default
"""
return ((item, False) for item in self.get_all())
def pre_filtered_list(self, filters):
"""List collection items with optional pre filtering.
This could largely improve performance of reports depending on
the filters and this implementation.
This returns all event by default
DEPRECATED: use ``get_all_filtered`` instead
"""
return [self.get(href) for href in self.list()]
return self.get_all()
def has(self, href):
"""Check if an item exists by its href.
@ -414,6 +514,8 @@ class Collection(BaseCollection):
split_path = self.path.split("/")
self.owner = split_path[0] if len(split_path) > 1 else None
self.is_principal = principal
self._meta = None
self._etag = None
@classmethod
def _get_collection_root_folder(cls):
@ -533,17 +635,15 @@ class Collection(BaseCollection):
for item in collection.list():
yield collection.get(item)
for href in os.listdir(filesystem_path):
for href in scandir(filesystem_path, only_dirs=True):
if not is_safe_filesystem_path_component(href):
if not href.startswith(".Radicale"):
cls.logger.debug("Skipping collection %r in %r", href,
path)
continue
child_filesystem_path = path_to_filesystem(filesystem_path, href)
if os.path.isdir(child_filesystem_path):
child_path = posixpath.join(path, href)
child_principal = len(attributes) == 0
yield cls(child_path, child_principal)
child_path = posixpath.join(path, href)
child_principal = len(attributes) == 0
yield cls(child_path, child_principal)
@classmethod
def create_collection(cls, href, collection=None, props=None):
@ -724,7 +824,7 @@ class Collection(BaseCollection):
history_folder = os.path.join(self._filesystem_path,
".Radicale.cache", "history")
try:
for href in os.listdir(history_folder):
for href in scandir(history_folder):
if not is_safe_filesystem_path_component(href):
continue
if os.path.isfile(os.path.join(self._filesystem_path, href)):
@ -766,7 +866,7 @@ class Collection(BaseCollection):
token_name_hash = md5()
# Find the history of all existing and deleted items
for href, item in chain(
((item.href, item) for item in self.pre_filtered_list(())),
((item.href, item) for item in self.get_all()),
((href, None) for href in self._get_deleted_history_hrefs())):
history_etag = self._update_history_etag(href, item)
state[href] = history_etag
@ -835,43 +935,135 @@ class Collection(BaseCollection):
return token, changes
def list(self):
for href in os.listdir(self._filesystem_path):
for href in scandir(self._filesystem_path, only_files=True):
if not is_safe_filesystem_path_component(href):
if not href.startswith(".Radicale"):
self.logger.debug(
"Skipping item %r in %r", href, self.path)
continue
path = os.path.join(self._filesystem_path, href)
if os.path.isfile(path):
yield href
yield href
def get(self, href):
if not href:
return None
if not is_safe_filesystem_path_component(href):
self.logger.debug("Can't translate name %r safely to filesystem "
"in %r", href, self.path)
return None
path = path_to_filesystem(self._filesystem_path, href)
if not os.path.isfile(path):
return None
with open(path, encoding=self.encoding, newline="") as f:
text = f.read()
_item_cache_cleaned = False
def get(self, href, verify_href=True):
item, metadata = self._get_with_metadata(href, verify_href=verify_href)
return item
def _get_with_metadata(self, href, verify_href=True):
# Like ``get`` but additonally returns the following metadata:
# tag, start, end: see ``xmlutils.find_tag_and_time_range``
if verify_href:
try:
if not is_safe_filesystem_path_component(href):
raise UnsafePathError(href)
path = path_to_filesystem(self._filesystem_path, href)
except ValueError as e:
self.logger.debug(
"Can't translate name %r safely to filesystem in %r: %s",
href, self.path, e, exc_info=True)
return None, None
else:
path = os.path.join(self._filesystem_path, href)
try:
with open(path, "rb") as f:
btext = f.read()
except (FileNotFoundError, IsADirectoryError):
return None, None
# The hash of the component in the file system. This is used to check,
# if the entry in the cache is still valid.
input_hash = md5()
input_hash.update(btext)
input_hash = input_hash.hexdigest()
cache_folder = os.path.join(self._filesystem_path, ".Radicale.cache",
"item")
try:
with open(os.path.join(cache_folder, href), "rb") as f:
cinput_hash, cetag, ctext, ctag, cstart, cend = pickle.load(f)
except (FileNotFoundError, pickle.UnpicklingError, ValueError) as e:
if isinstance(e, (pickle.UnpicklingError, ValueError)):
self.logger.warning(
"Failed to load item cache entry %r in %r: %s",
href, self.path, e, exc_info=True)
cinput_hash = cetag = ctext = ctag = cstart = cend = None
vobject_item = None
if input_hash != cinput_hash:
vobject_item = Item(self, href=href,
text=btext.decode(self.encoding)).item
# Serialize the object again, to normalize the text representation.
# The storage may have been edited externally.
ctext = vobject_item.serialize()
cetag = get_etag(ctext)
try:
ctag, cstart, cend = xmlutils.find_tag_and_time_range(
vobject_item)
except Exception as e:
raise RuntimeError("Failed to find tag and time range of item "
"%r from %r: %s" % (href, self.path,
e)) from e
self._makedirs_synced(cache_folder)
try:
# Race: Other processes might have created and locked the
# file.
with self._atomic_write(os.path.join(cache_folder, href),
"wb") as f:
pickle.dump((input_hash, cetag, ctext,
ctag, cstart, cend), f)
except PermissionError:
pass
# Clean cache entries (max once per request)
# This happens once after new uploads, or if the data in the
# file system was edited externally.
if not self._item_cache_cleaned:
self._item_cache_cleaned = True
self._clean_cache(cache_folder, (
href for href in scandir(cache_folder) if not
os.path.isfile(os.path.join(self._filesystem_path, href))))
last_modified = time.strftime(
"%a, %d %b %Y %H:%M:%S GMT",
time.gmtime(os.path.getmtime(path)))
try:
item = vobject.readOne(text)
except Exception as e:
raise RuntimeError("Failed to parse item %r in %r" %
(href, self.path)) from e
return Item(self, item, href, last_modified)
return Item(self, href=href, last_modified=last_modified, etag=cetag,
text=ctext, item=vobject_item), (ctag, cstart, cend)
def get_multi2(self, hrefs):
# It's faster to check for file name collissions here, because
# we only need to call os.listdir once.
files = None
for href in hrefs:
if files is None:
# List dir after hrefs returned one item, the iterator may be
# empty and the for-loop is never executed.
files = os.listdir(self._filesystem_path)
path = os.path.join(self._filesystem_path, href)
if (not is_safe_filesystem_path_component(href) or
href not in files and os.path.lexists(path)):
self.logger.debug(
"Can't translate name safely to filesystem: %r", href)
yield (href, None)
else:
yield (href, self.get(href, verify_href=False))
def get_all(self):
# We don't need to check for collissions, because the the file names
# are from os.listdir.
return (self.get(href, verify_href=False) for href in self.list())
def get_all_filtered(self, filters):
tag, start, end, simple = xmlutils.simplify_prefilters(filters)
if not tag:
# no filter
yield from ((item, simple) for item in self.get_all())
return
for item, (itag, istart, iend) in (
self._get_with_metadata(href, verify_href=False)
for href in self.list()):
if tag == itag and istart < end and iend > start:
yield item, simple and (start <= istart or iend <= end)
def upload(self, href, vobject_item):
if not is_safe_filesystem_path_component(href):
raise UnsafePathError(href)
path = path_to_filesystem(self._filesystem_path, href)
item = Item(self, vobject_item, href)
item = Item(self, href=href, item=vobject_item)
with self._atomic_write(path, newline="") as fd:
fd.write(item.serialize())
# Track the change
@ -907,57 +1099,101 @@ class Collection(BaseCollection):
self._clean_history_cache()
def get_meta(self, key=None):
if os.path.exists(self._props_path):
with open(self._props_path, encoding=self.encoding) as f:
try:
meta = json.load(f)
except ValueError as e:
raise RuntimeError("Failed to load properties of collect"
"ion %r: %s" % (self.path, e)) from e
return meta.get(key) if key else meta
# reuse cached value if the storage is read-only
if self._writer or self._meta is None:
try:
with open(self._props_path, encoding=self.encoding) as f:
self._meta = json.load(f)
except FileNotFoundError:
self._meta = {}
except ValueError as e:
raise RuntimeError("Failed to load properties of collect"
"ion %r: %s" % (self.path, e)) from e
return self._meta.get(key) if key else self._meta
def set_meta(self, props):
if os.path.exists(self._props_path):
with open(self._props_path, encoding=self.encoding) as f:
old_props = json.load(f)
old_props.update(props)
props = old_props
props = {key: value for key, value in props.items() if value}
with self._atomic_write(self._props_path, "w+") as f:
json.dump(props, f)
new_props = self.get_meta()
new_props.update(props)
for key in tuple(new_props.keys()):
if not new_props[key]:
del new_props[key]
with self._atomic_write(self._props_path, "w") as f:
json.dump(new_props, f)
@property
def last_modified(self):
relevant_files = [self._filesystem_path] + [
path_to_filesystem(self._filesystem_path, href)
for href in self.list()]
if os.path.exists(self._props_path):
relevant_files.append(self._props_path)
relevant_files = chain(
(self._filesystem_path,),
(self._props_path,) if os.path.exists(self._props_path) else (),
(os.path.join(self._filesystem_path, h) for h in self.list()))
last = max(map(os.path.getmtime, relevant_files))
return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(last))
def serialize(self):
items = []
time_begin = datetime.datetime.now()
for href in self.list():
items.append(self.get(href).item)
time_end = datetime.datetime.now()
self.logger.info(
"Read %d items in %.3f seconds from %r", len(items),
(time_end - time_begin).total_seconds(), self.path)
# serialize collection
if self.get_meta("tag") == "VCALENDAR":
collection = vobject.iCalendar()
for item in items:
for content in ("vevent", "vtodo", "vjournal"):
if content in item.contents:
for item_part in getattr(item, "%s_list" % content):
collection.add(item_part)
break
return collection.serialize()
in_vcalendar = False
vtimezones = ""
included_tzids = set()
vtimezone = []
tzid = None
components = ""
# Concatenate all child elements of VCALENDAR from all items
# together, while preventing duplicated VTIMEZONE entries.
# VTIMEZONEs are only distinguished by their TZID, if different
# timezones share the same TZID this produces errornous ouput.
# VObject fails at this too.
for item in self.get_all():
depth = 0
for line in item.serialize().split("\r\n"):
if line.startswith("BEGIN:"):
depth += 1
if depth == 1 and line == "BEGIN:VCALENDAR":
in_vcalendar = True
elif in_vcalendar:
if depth == 1 and line.startswith("END:"):
in_vcalendar = False
if depth == 2 and line == "BEGIN:VTIMEZONE":
vtimezone.append(line)
elif vtimezone:
vtimezone.append(line)
if depth == 2 and line.startswith("TZID:"):
tzid = line[len("TZID:"):]
elif depth == 2 and line.startswith("END:"):
if tzid is None or tzid not in included_tzids:
if vtimezones:
vtimezones += "\r\n"
vtimezones += "\r\n".join(vtimezone)
included_tzids.add(tzid)
vtimezone.clear()
tzid = None
elif depth >= 2:
if components:
components += "\r\n"
components += line
if line.startswith("END:"):
depth -= 1
return "\r\n".join(filter(bool, (
"BEGIN:VCALENDAR",
"VERSION:2.0",
"PRODID:-//PYVOBJECT//NONSGML Version 1//EN",
vtimezones,
components,
"END:VCALENDAR")))
elif self.get_meta("tag") == "VADDRESSBOOK":
return "".join([item.serialize() for item in items])
return "".join((item.serialize() for item in self.get_all()))
return ""
@property
def etag(self):
# reuse cached value if the storage is read-only
if self._writer or self._etag is None:
etag = md5()
for item in self.get_all():
etag.update((item.href + "/" + item.etag).encode("utf-8"))
self._etag = '"%s"' % etag.hexdigest()
return self._etag
_lock = threading.Lock()
_waiters = []
_lock_file = None

View File

@ -26,12 +26,14 @@ in them for XML requests (all but PUT).
"""
import copy
import math
import posixpath
import re
import xml.etree.ElementTree as ET
from collections import OrderedDict
from datetime import datetime, timedelta, timezone
from datetime import date, datetime, timedelta, timezone
from http import client
from itertools import chain
from urllib.parse import quote, unquote, urlparse
from . import storage
@ -56,6 +58,13 @@ for short, url in NAMESPACES.items():
CLARK_TAG_REGEX = re.compile(r"{(?P<namespace>[^}]*)}(?P<tag>.*)", re.VERBOSE)
HUMAN_REGEX = re.compile(r"(?P<namespace>[^:{}]*)(?P<tag>.*)", re.VERBOSE)
DAY = timedelta(days=1)
SECOND = timedelta(seconds=1)
DATETIME_MIN = datetime.min.replace(tzinfo=timezone.utc)
DATETIME_MAX = datetime.max.replace(tzinfo=timezone.utc)
TIMESTAMP_MIN = math.floor(DATETIME_MIN.timestamp())
TIMESTAMP_MAX = math.ceil(DATETIME_MAX.timestamp())
def pretty_xml(element, level=0):
"""Indent an ElementTree ``element`` and its children."""
@ -210,11 +219,9 @@ def _prop_match(item, filter_):
def _time_range_match(vobject_item, filter_, child_name):
"""Check whether the ``item`` matches the time-range ``filter_``.
"""Check whether the component/property ``child_name`` of
``vobject_item`` matches the time-range ``filter_``."""
See rfc4791-9.9.
"""
start = filter_.get("start")
end = filter_.get("end")
if not start and not end:
@ -229,14 +236,53 @@ def _time_range_match(vobject_item, filter_, child_name):
end = datetime.max
start = start.replace(tzinfo=timezone.utc)
end = end.replace(tzinfo=timezone.utc)
child = getattr(vobject_item, child_name.lower())
matched = False
def range_fn(range_start, range_end):
nonlocal matched
if start < range_end and range_start < end:
matched = True
return True
if end < range_start:
return True
return False
def infinity_fn(start):
return False
_visit_time_ranges(vobject_item, child_name, range_fn, infinity_fn)
return matched
def _visit_time_ranges(vobject_item, child_name, range_fn, infinity_fn):
"""Visit all time ranges in the component/property ``child_name`` of
`vobject_item`` with visitors ``range_fn`` and ``infinity_fn``.
``range_fn`` gets called for every time_range with ``start`` and ``end``
datetimes as arguments. If the function returns True, the operation is
cancelled.
``infinity_fn`` gets called when an infiite recurrence rule is detected
with ``start`` datetime as argument. If the function returns True, the
operation is cancelled.
See rfc4791-9.9.
"""
child = getattr(vobject_item, child_name.lower())
# Comments give the lines in the tables of the specification
if child_name == "VEVENT":
# TODO: check if there's a timezone
dtstart = child.dtstart.value
if child.rruleset:
if (";UNTIL=" not in child.rrule.value and
";COUNT=" not in child.rrule.value):
for dtstart in child.getrruleset(addRDate=True):
if infinity_fn(_date_to_datetime(dtstart)):
return
break
dtstarts = child.getrruleset(addRDate=True)
else:
dtstarts = (dtstart,)
@ -255,31 +301,30 @@ def _time_range_match(vobject_item, filter_, child_name):
dtstart_is_datetime = isinstance(dtstart, datetime)
dtstart = _date_to_datetime(dtstart)
if dtstart > end:
break
if dtend is not None:
# Line 1
dtend = dtstart + timedelta(seconds=original_duration)
if start < dtend and end > dtstart:
return True
if range_fn(dtstart, dtend):
return
elif duration is not None:
if original_duration is None:
original_duration = duration.seconds
if duration.seconds > 0:
# Line 2
if start < dtstart + duration and end > dtstart:
return True
elif start <= dtstart and end > dtstart:
if range_fn(dtstart, dtstart + duration):
return
else:
# Line 3
return True
if range_fn(dtstart, dtstart + SECOND):
return
elif dtstart_is_datetime:
# Line 4
if start <= dtstart and end > dtstart:
return True
elif start < dtstart + timedelta(days=1) and end > dtstart:
if range_fn(dtstart, dtstart + SECOND):
return
else:
# Line 5
return True
if range_fn(dtstart, dtstart + DAY):
return
elif child_name == "VTODO":
dtstart = getattr(child, "dtstart", None)
@ -305,6 +350,12 @@ def _time_range_match(vobject_item, filter_, child_name):
created = _date_to_datetime(created.value)
if child.rruleset:
if (";UNTIL=" not in child.rrule.value and
";COUNT=" not in child.rrule.value):
for reference_date in child.getrruleset(addRDate=True):
if infinity_fn(_date_to_datetime(reference_date)):
return
break
reference_dates = child.getrruleset(addRDate=True)
else:
if dtstart is not None:
@ -317,47 +368,56 @@ def _time_range_match(vobject_item, filter_, child_name):
reference_dates = (created,)
else:
# Line 8
return True
if range_fn(DATETIME_MIN, DATETIME_MAX):
return
reference_dates = ()
for reference_date in reference_dates:
reference_date = _date_to_datetime(reference_date)
if reference_date > end:
break
if dtstart is not None and duration is not None:
# Line 1
if start <= reference_date + duration and (
end > reference_date or
end >= reference_date + duration):
return True
if range_fn(reference_date,
reference_date + duration + SECOND):
return
if range_fn(reference_date + duration - SECOND,
reference_date + duration + SECOND):
return
elif dtstart is not None and due is not None:
# Line 2
due = reference_date + timedelta(seconds=original_duration)
if (start < due or start <= reference_date) and (
end > reference_date or end >= due):
return True
if (range_fn(reference_date, due) or
range_fn(reference_date, reference_date + SECOND) or
range_fn(due - SECOND, due) or
range_fn(due - SECOND, reference_date + SECOND)):
return
elif dtstart is not None:
if start <= reference_date and end > reference_date:
return True
if range_fn(reference_date, reference_date + SECOND):
return
elif due is not None:
# Line 4
if start < reference_date and end >= reference_date:
return True
if range_fn(reference_date - SECOND, reference_date):
return
elif completed is not None and created is not None:
# Line 5
completed = reference_date + timedelta(
seconds=original_duration)
if (start <= reference_date or start <= completed) and (
end >= reference_date or end >= completed):
return True
if (range_fn(reference_date - SECOND,
reference_date + SECOND) or
range_fn(completed - SECOND, completed + SECOND) or
range_fn(reference_date - SECOND,
reference_date + SECOND) or
range_fn(completed - SECOND, completed + SECOND)):
return
elif completed is not None:
# Line 6
if start <= reference_date and end >= reference_date:
return True
if range_fn(reference_date - SECOND,
reference_date + SECOND):
return
elif created is not None:
# Line 7
if end > reference_date:
return True
if range_fn(reference_date, DATETIME_MAX):
return
elif child_name == "VJOURNAL":
dtstart = getattr(child, "dtstart", None)
@ -365,6 +425,12 @@ def _time_range_match(vobject_item, filter_, child_name):
if dtstart is not None:
dtstart = dtstart.value
if child.rruleset:
if (";UNTIL=" not in child.rrule.value and
";COUNT=" not in child.rrule.value):
for dtstart in child.getrruleset(addRDate=True):
if infinity_fn(_date_to_datetime(dtstart)):
return
break
dtstarts = child.getrruleset(addRDate=True)
else:
dtstarts = (dtstart,)
@ -373,18 +439,21 @@ def _time_range_match(vobject_item, filter_, child_name):
dtstart_is_datetime = isinstance(dtstart, datetime)
dtstart = _date_to_datetime(dtstart)
if dtstart > end:
break
if dtstart_is_datetime:
# Line 1
if start <= dtstart and end > dtstart:
return True
elif start < dtstart + timedelta(days=1) and end > dtstart:
if range_fn(dtstart, dtstart + SECOND):
return
else:
# Line 2
return True
if range_fn(dtstart, dtstart + DAY):
return
return False
elif isinstance(child, date):
if range_fn(child, child + DAY):
return
elif isinstance(child, datetime):
if range_fn(child, child + SECOND):
return
def _text_match(vobject_item, filter_, child_name, attrib_name=None):
@ -429,6 +498,99 @@ def _param_filter_match(vobject_item, filter_, parent_name):
return condition
def simplify_prefilters(filters):
"""Creates a simplified condition from ``filters``.
Returns a tuple (``tag``, ``start``, ``end``, ``simple``) where ``tag`` is
a string or None (match all) and ``start`` and ``end`` are POSIX
timestamps (as int). ``simple`` is a bool that indicates that ``filters``
and the simplified condition are identical.
"""
flat_filters = tuple(chain.from_iterable(filters))
simple = len(flat_filters) <= 1
for col_filter in flat_filters:
if (col_filter.tag != _tag("C", "comp-filter") or
col_filter.get("name") != "VCALENDAR"):
simple = False
continue
simple &= len(col_filter) <= 1
for comp_filter in col_filter:
if comp_filter.tag != _tag("C", "comp-filter"):
simple = False
continue
tag = comp_filter.get("name")
if (tag not in ("VTODO", "VEVENT", "VJOURNAL") or comp_filter.find(
_tag("C", "is-not-defined")) is not None):
simple = False
continue
simple &= len(comp_filter) <= 1
for time_filter in comp_filter:
if time_filter.tag != _tag("C", "time-range"):
simple = False
continue
start = time_filter.get("start")
end = time_filter.get("end")
if start:
start = math.floor(datetime.strptime(
start, "%Y%m%dT%H%M%SZ").replace(
tzinfo=timezone.utc).timestamp())
else:
start = TIMESTAMP_MIN
if end:
end = math.ceil(datetime.strptime(
end, "%Y%m%dT%H%M%SZ").replace(
tzinfo=timezone.utc).timestamp())
else:
end = TIMESTAMP_MAX
return tag, start, end, simple
return tag, TIMESTAMP_MIN, TIMESTAMP_MAX, simple
return None, TIMESTAMP_MIN, TIMESTAMP_MAX, simple
def find_tag_and_time_range(vobject_item):
"""Find tag and enclosing time range from ``vobject item``.
Returns a tuple (``tag``, ``start``, ``end``) where ``tag`` is a string
and ``start`` and ``end`` are POSIX timestamps (as int).
This is intened to be used for matching against simplified prefilters.
"""
tag = ""
if vobject_item.name == "VCALENDAR":
for component in vobject_item.components():
if component.name in ("VTODO", "VEVENT", "VJOURNAL"):
tag = component.name
break
if not tag:
return (None, math.floor(DATETIME_MIN.timestamp()),
math.ceil(DATETIME_MAX.timestamp()))
start = end = None
def range_fn(range_start, range_end):
nonlocal start, end
if start is None or range_start < start:
start = range_start
if end is None or end < range_end:
end = range_end
return False
def infinity_fn(range_start):
nonlocal start, end
if start is None or range_start < start:
start = range_start
end = DATETIME_MAX
return True
_visit_time_ranges(vobject_item, tag, range_fn, infinity_fn)
if start is None:
start = DATETIME_MIN
if end is None:
end = DATETIME_MAX
return tag, math.floor(start.timestamp()), math.ceil(end.timestamp())
def name_from_path(path, collection):
"""Return Radicale item name from ``path``."""
path = path.strip("/") + "/"
@ -891,70 +1053,87 @@ def report(base_prefix, path, xml_request, collection):
root.findall("./%s" % _tag("C", "filter")) +
root.findall("./%s" % _tag("CR", "filter")))
for hreference in hreferences:
try:
name = name_from_path(hreference, collection)
except ValueError as e:
collection.logger.warning("Skipping invalid path %r in REPORT "
"request on %r: %s", hreference, path, e)
response = _item_response(base_prefix, hreference,
found_item=False)
multistatus.append(response)
continue
if name:
# Reference is an item
item = collection.get(name)
def retrieve_items(collection, hreferences, multistatus):
"""Retrieves all items that are referenced in ``hreferences`` from
``collection`` and adds 404 responses for missing and invalid items
to ``multistatus``."""
collection_requested = False
def get_names():
"""Extracts all names from references in ``hreferences`` and adds
404 responses for invalid references to ``multistatus``.
If the whole collections is referenced ``collection_requested``
gets set to ``True``."""
nonlocal collection_requested
for hreference in hreferences:
try:
name = name_from_path(hreference, collection)
except ValueError as e:
collection.logger.warning(
"Skipping invalid path %r in REPORT request on %r: %s",
hreference, path, e)
response = _item_response(base_prefix, hreference,
found_item=False)
multistatus.append(response)
continue
if name:
# Reference is an item
yield name
else:
# Reference is a collection
collection_requested = True
for name, item in collection.get_multi2(get_names()):
if not item:
response = _item_response(base_prefix, hreference,
uri = "/" + posixpath.join(collection.path, name)
response = _item_response(base_prefix, uri,
found_item=False)
multistatus.append(response)
continue
items = [item]
else:
# Reference is a collection
items = collection.pre_filtered_list(filters)
else:
yield item, False
if collection_requested:
yield from collection.get_all_filtered(filters)
for item in items:
if not item:
continue
if filters:
try:
match = (_comp_match
if collection.get_meta("tag") == "VCALENDAR"
else _prop_match)
if not all(match(item, filter_[0]) for filter_ in filters
if filter_):
continue
except Exception as e:
raise RuntimeError("Failed to filter item %r from %r: %s" %
(collection.path, item.href, e)) from e
for item, filters_matched in retrieve_items(collection, hreferences,
multistatus):
if filters and not filters_matched:
match = (
_comp_match if collection.get_meta("tag") == "VCALENDAR"
else _prop_match)
try:
if not all(match(item, filter_[0]) for filter_ in filters
if filter_):
continue
except Exception as e:
raise RuntimeError("Failed to filter item %r from %r: %s" %
(item.href, collection.path, e)) from e
found_props = []
not_found_props = []
found_props = []
not_found_props = []
for tag in props:
element = ET.Element(tag)
if tag == _tag("D", "getetag"):
element.text = item.etag
found_props.append(element)
elif tag == _tag("D", "getcontenttype"):
name = item.name.lower()
mimetype = (
"text/vcard" if name == "vcard" else "text/calendar")
element.text = "%s; component=%s" % (mimetype, name)
found_props.append(element)
elif tag in (
_tag("C", "calendar-data"),
_tag("CR", "address-data")):
element.text = item.serialize()
found_props.append(element)
else:
not_found_props.append(element)
for tag in props:
element = ET.Element(tag)
if tag == _tag("D", "getetag"):
element.text = item.etag
found_props.append(element)
elif tag == _tag("D", "getcontenttype"):
name = item.name.lower()
mimetype = (
"text/vcard" if name == "vcard" else "text/calendar")
element.text = "%s; component=%s" % (mimetype, name)
found_props.append(element)
elif tag in (
_tag("C", "calendar-data"),
_tag("CR", "address-data")):
element.text = item.serialize()
found_props.append(element)
else:
not_found_props.append(element)
uri = "/" + posixpath.join(collection.path, item.href)
multistatus.append(_item_response(
base_prefix, uri, found_props=found_props,
not_found_props=not_found_props, found_item=True))
uri = "/" + posixpath.join(collection.path, item.href)
multistatus.append(_item_response(
base_prefix, uri, found_props=found_props,
not_found_props=not_found_props, found_item=True))
return client.MULTI_STATUS, multistatus