Bump version

Sort download summary
Move "sanitize_windows_path" to PFERD.transform
2023-12-21 10:23:01 +01:00 · 2020-11-18 10:09:45 +01:00 · 2020-11-17 21:36:04 +01:00 · 2020-11-12 20:52:46 +01:00 · 2020-11-12 20:23:36 +01:00 · 2020-11-12 20:21:24 +01:00
13 changed files with 553 additions and 19 deletions
--- a/.github/workflows/package.yml
+++ b/.github/workflows/package.yml
@ -0,0 +1,74 @@
 name: Package Application with Pyinstaller
 on:
  push:
    branches:
      - "*"
    tags:
      - "v*"
 jobs:
  build:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
    steps:
    - uses: actions/checkout@v2
    - uses: actions/setup-python@v2
      with:
        python-version: '3.x'
    - name: "Install dependencies"
      run: "pip install setuptools pyinstaller rich requests beautifulsoup4 -f --upgrade"
    - name: "Install sync_url.py"
      run: "pyinstaller sync_url.py -F"
    - name: "Move artifact"
      run: "mv dist/sync_url* dist/sync_url-${{ matrix.os }}"
    - uses: actions/upload-artifact@v2
      with:
        name: "Pferd Sync URL"
        path: "dist/sync_url*"
  release:
    name: Release
    needs: [build]
    runs-on: ubuntu-latest
    if: startsWith(github.ref, 'refs/tags/')
    env:
      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    steps:
    - name: "Checkout"
      uses: actions/checkout@v2
    - name: "Download artifacts"
      uses: actions/download-artifact@v2
      with:
        name: "Pferd Sync URL"
    - name: "look at folder structure"
      run: "ls -lah"
    - name: "Rename releases"
      run: "mv sync_url-macos-latest pferd_sync_url_mac && mv sync_url-ubuntu-latest pferd_sync_url_linux && mv sync_url-windows-latest pferd_sync_url.exe"
    - name: "Create release"
      uses: softprops/action-gh-release@v1
    - name: "Upload release artifacts"
      uses: softprops/action-gh-release@v1
      with:
        body: "Download the correct sync_url for your platform and run it in the terminal or CMD. You might need to make it executable on Linux/Mac with `chmod +x <file>`. Also please enclose the *url you pass to the program in double quotes* or your shell might silently screw it up!"
        files: |
          pferd_sync_url_mac
          pferd_sync_url_linux
          pferd_sync_url.exe
--- a/.gitignore
+++ b/.gitignore
@ -8,3 +8,7 @@ build/
 .env
 .vscode
 ilias_cookies.txt
 # PyInstaller
 sync_url.spec
 dist/
--- a/PFERD/cookie_jar.py
+++ b/PFERD/cookie_jar.py
@ -22,7 +22,7 @@ class CookieJar:
        if cookie_file is None:
            self._cookies = LWPCookieJar()
        else:
-            self._cookies = LWPCookieJar(cookie_file)
+            self._cookies = LWPCookieJar(str(cookie_file.resolve()))
    @property
    def cookies(self) -> LWPCookieJar:
--- a/PFERD/download_summary.py
+++ b/PFERD/download_summary.py
@ -5,6 +5,12 @@ from pathlib import Path
 from typing import List
 def _mergeNoDuplicate(first: List[Path], second: List[Path]) -> List[Path]:
    tmp = list(set(first + second))
    tmp.sort(key=lambda x: str(x.resolve()))
    return tmp
 class DownloadSummary:
    """
    Keeps track of all new, modified or deleted files and provides a summary.
@ -40,9 +46,9 @@ class DownloadSummary:
        """
        Merges ourselves with the passed summary. Modifies this object, but not the passed one.
        """
-        self._new_files += summary.new_files
+        self._new_files = _mergeNoDuplicate(self._new_files, summary.new_files)
-        self._modified_files += summary.modified_files
+        self._modified_files = _mergeNoDuplicate(self._modified_files, summary.modified_files)
-        self._deleted_files += summary.deleted_files
+        self._deleted_files = _mergeNoDuplicate(self._deleted_files, summary.deleted_files)
    def add_deleted_file(self, path: Path) -> None:
        """
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@ -26,6 +26,10 @@ LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
 def _sanitize_path_name(name: str) -> str:
    return name.replace("/", "-")
 class IliasElementType(Enum):
    """
    The type of an ilias element.
@ -116,6 +120,16 @@ class IliasCrawler:
        return urlunsplit((scheme, netloc, path, new_query_string, fragment))
    def recursive_crawl_url(self, url: str) -> List[IliasDownloadInfo]:
        """
        Crawls a given url *and all reachable elements in it*.
        Args:
            url {str} -- the *full* url to crawl
        """
        start_entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), url)
        return self._iterate_entries_to_download_infos(start_entries)
    def crawl_course(self, course_id: str) -> List[IliasDownloadInfo]:
        """
        Starts the crawl process for a course, yielding a list of elements to (potentially)
@ -134,7 +148,7 @@ class IliasCrawler:
        if not self._is_course_id_valid(root_url, course_id):
            raise FatalException(
-                "Invalid course id? The URL the server returned did not contain my id."
+                "Invalid course id? I didn't find anything looking like a course!"
            )
        # And treat it as a folder
@ -143,7 +157,34 @@ class IliasCrawler:
    def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
        response: requests.Response = self._session.get(root_url)
-        return course_id in response.url
+        # We were redirected ==> Non-existant ID
        if course_id not in response.url:
            return False
        link_element: bs4.Tag = self._get_page(root_url, {}).find(id="current_perma_link")
        if not link_element:
            return False
        # It wasn't a course but a category list, forum, etc.
        return "crs_" in link_element.get("value")
    def find_course_name(self, course_id: str) -> Optional[str]:
        """
        Returns the name of a given course. None if it is not a valid course
        or it could not be found.
        """
        course_url = self._url_set_query_param(
            self._base_url + "/goto.php", "target", f"crs_{course_id}"
        )
        return self.find_element_name(course_url)
    def find_element_name(self, url: str) -> Optional[str]:
        """
        Returns the name of the element at the given URL, if it can find one.
        """
        focus_element: bs4.Tag = self._get_page(url, {}).find(id="il_mhead_t_focus")
        if not focus_element:
            return None
        return focus_element.text
    def crawl_personal_desktop(self) -> List[IliasDownloadInfo]:
        """
@ -208,13 +249,22 @@ class IliasCrawler:
        """
        soup = self._get_page(url, {})
        if soup.find(id="headerimage"):
            element: bs4.Tag = soup.find(id="headerimage")
            if "opencast" in element.attrs["src"].lower():
                PRETTY.warning(f"Switched to crawling a video at {folder_path}")
                if not self.dir_filter(folder_path, IliasElementType.VIDEO_FOLDER):
                    PRETTY.not_searching(folder_path, "user filter")
                    return []
                return self._crawl_video_directory(folder_path, url)
        result: List[IliasCrawlerEntry] = []
        # Fetch all links and throw them to the general interpreter
        links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
        for link in links:
            abs_url = self._abs_url_from_link(link)
-            element_path = Path(folder_path, link.getText().strip())
+            element_path = Path(folder_path, _sanitize_path_name(link.getText().strip()))
            element_type = self._find_type_from_link(element_path, link, abs_url)
            if element_type == IliasElementType.REGULAR_FILE:
@ -331,7 +381,7 @@ class IliasCrawler:
            modification_date = demangle_date(modification_date_str)
        # Grab the name from the link text
-        name = link_element.getText()
+        name = _sanitize_path_name(link_element.getText())
        full_path = Path(path, name + "." + file_type)
        return [
@ -462,7 +512,7 @@ class IliasCrawler:
        ).getText().strip()
        title += ".mp4"
-        video_path: Path = Path(parent_path, title)
+        video_path: Path = Path(parent_path, _sanitize_path_name(title))
        video_url = self._abs_url_from_link(link)
@ -534,6 +584,7 @@ class IliasCrawler:
                # Two divs, side by side. Left is the name, right is the link ==> get left
                # sibling
                file_name = file_link.parent.findPrevious(name="div").getText().strip()
                file_name = _sanitize_path_name(file_name)
                url = self._abs_url_from_link(file_link)
                LOGGER.debug("Found file %r at %r", file_name, url)
@ -547,10 +598,17 @@ class IliasCrawler:
        return results
-    def _get_page(self, url: str, params: Dict[str, Any]) -> bs4.BeautifulSoup:
+    def _get_page(self, url: str, params: Dict[str, Any],
                  retry_count: int = 0) -> bs4.BeautifulSoup:
        """
        Fetches a page from ILIAS, authenticating when needed.
        """
        if retry_count >= 4:
            raise FatalException("Could not get a proper page after 4 tries. "
                                 "Maybe your URL is wrong, authentication fails continuously, "
                                 "your ILIAS connection is spotty or ILIAS is not well.")
        LOGGER.debug("Fetching %r", url)
        response = self._session.get(url, params=params)
@ -571,7 +629,7 @@ class IliasCrawler:
        self._authenticator.authenticate(self._session)
-        return self._get_page(url, params)
+        return self._get_page(url, params, retry_count + 1)
    @staticmethod
    def _is_logged_in(soup: bs4.BeautifulSoup) -> bool:
--- a/PFERD/ilias/downloader.py
+++ b/PFERD/ilias/downloader.py
@ -84,9 +84,13 @@ class IliasDownloader:
            session: requests.Session,
            authenticator: IliasAuthenticator,
            strategy: IliasDownloadStrategy,
            timeout: int = 5
    ):
        """
        Create a new IliasDownloader.
        The timeout applies to the download request only, as bwcloud uses IPv6
        and requests has a problem with that: https://github.com/psf/requests/issues/5522
        """
        self._tmp_dir = tmp_dir
@ -94,6 +98,7 @@ class IliasDownloader:
        self._session = session
        self._authenticator = authenticator
        self._strategy = strategy
        self._timeout = timeout
    def download_all(self, infos: List[IliasDownloadInfo]) -> None:
        """
@ -137,7 +142,7 @@ class IliasDownloader:
            PRETTY.warning(f"Could not download {str(info.path)!r} as I got no URL :/")
            return True
-        with self._session.get(url, stream=True) as response:
+        with self._session.get(url, stream=True, timeout=self._timeout) as response:
            content_type = response.headers["content-type"]
            has_content_disposition = "content-disposition" in response.headers
--- a/PFERD/ipd.py
+++ b/PFERD/ipd.py
@ -0,0 +1,151 @@
 """
 Utility functions and a scraper/downloader for the IPD pages.
 """
 import datetime
 import logging
 import math
 import os
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Callable, List, Optional
 from urllib.parse import urljoin
 import bs4
 import requests
 from PFERD.errors import FatalException
 from PFERD.utils import soupify
 from .logging import PrettyLogger
 from .organizer import Organizer
 from .tmp_dir import TmpDir
 from .transform import Transformable
 from .utils import stream_to_path
 LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
@dataclass
 class IpdDownloadInfo(Transformable):
    """
    Information about an ipd entry.
    """
    url: str
    modification_date: Optional[datetime.datetime]
 IpdDownloadStrategy = Callable[[Organizer, IpdDownloadInfo], bool]
 def ipd_download_new_or_modified(organizer: Organizer, info: IpdDownloadInfo) -> bool:
    """
    Accepts new files or files with a more recent modification date.
    """
    resolved_file = organizer.resolve(info.path)
    if not resolved_file.exists():
        return True
    if not info.modification_date:
        PRETTY.ignored_file(info.path, "could not find modification time, file exists")
        return False
    resolved_mod_time_seconds = resolved_file.stat().st_mtime
    # Download if the info is newer
    if info.modification_date.timestamp() > resolved_mod_time_seconds:
        return True
    PRETTY.ignored_file(info.path, "local file has newer or equal modification time")
    return False
 class IpdCrawler:
    # pylint: disable=too-few-public-methods
    """
    A crawler for IPD pages.
    """
    def __init__(self, base_url: str):
        self._base_url = base_url
    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
        """
        Create an absolute url from an <a> tag.
        """
        return urljoin(self._base_url, link_tag.get("href"))
    def crawl(self) -> List[IpdDownloadInfo]:
        """
        Crawls the playlist given in the constructor.
        """
        page = soupify(requests.get(self._base_url))
        items: List[IpdDownloadInfo] = []
        for link in page.findAll(name="a", attrs={"href": lambda x: x and x.endswith("pdf")}):
            href: str = link.attrs.get("href")
            name = href.split("/")[-1]
            modification_date: Optional[datetime.datetime] = None
            try:
                enclosing_row: bs4.Tag = link.findParent(name="tr")
                if enclosing_row:
                    date_text = enclosing_row.find(name="td").text
                    modification_date = datetime.datetime.strptime(date_text, "%d.%m.%Y")
            except ValueError:
                modification_date = None
            items.append(IpdDownloadInfo(
                Path(name),
                url=self._abs_url_from_link(link),
                modification_date=modification_date
            ))
        return items
 class IpdDownloader:
    """
    A downloader for ipd files.
    """
    def __init__(self, tmp_dir: TmpDir, organizer: Organizer, strategy: IpdDownloadStrategy):
        self._tmp_dir = tmp_dir
        self._organizer = organizer
        self._strategy = strategy
        self._session = requests.session()
    def download_all(self, infos: List[IpdDownloadInfo]) -> None:
        """
        Download multiple files one after the other.
        """
        for info in infos:
            self.download(info)
    def download(self, info: IpdDownloadInfo) -> None:
        """
        Download a single file.
        """
        if not self._strategy(self._organizer, info):
            self._organizer.mark(info.path)
            return
        with self._session.get(info.url, stream=True) as response:
            if response.status_code == 200:
                tmp_file = self._tmp_dir.new_path()
                stream_to_path(response, tmp_file, info.path.name)
                dst_path = self._organizer.accept_file(tmp_file, info.path)
                if dst_path and info.modification_date:
                    os.utime(
                        dst_path,
                        times=(
                            math.ceil(info.modification_date.timestamp()),
                            math.ceil(info.modification_date.timestamp())
                        )
                    )
            elif response.status_code == 403:
                raise FatalException("Received 403. Are you not using the KIT VPN?")
            else:
                PRETTY.warning(f"Could not download file, got response {response.status_code}")
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@ -124,6 +124,8 @@ class Organizer(Location):
        self._cleanup(self.path)
    def _cleanup(self, start_dir: Path) -> None:
        if not start_dir.exists():
            return
        paths: List[Path] = list(start_dir.iterdir())
        # Recursively clean paths
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@ -14,6 +14,8 @@ from .errors import FatalException, swallow_and_print_errors
 from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
                    IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy,
                    KitShibbolethAuthenticator, download_modified_or_new)
 from .ipd import (IpdCrawler, IpdDownloader, IpdDownloadInfo,
                  IpdDownloadStrategy, ipd_download_new_or_modified)
 from .location import Location
 from .logging import PrettyLogger, enable_logging
 from .organizer import Organizer
@ -72,7 +74,8 @@ class Pferd(Location):
            dir_filter: IliasDirectoryFilter,
            transform: Transform,
            download_strategy: IliasDownloadStrategy,
-            clean: bool = True
+            timeout: int,
            clean: bool = True,
    ) -> Organizer:
        # pylint: disable=too-many-locals
        cookie_jar = CookieJar(to_path(cookies) if cookies else None)
@ -81,7 +84,8 @@ class Pferd(Location):
        organizer = Organizer(self.resolve(to_path(target)))
        crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
-        downloader = IliasDownloader(tmp_dir, organizer, session, authenticator, download_strategy)
+        downloader = IliasDownloader(tmp_dir, organizer, session,
                                     authenticator, download_strategy, timeout)
        cookie_jar.load_cookies()
        info = crawl_function(crawler)
@ -112,6 +116,7 @@ class Pferd(Location):
            password: Optional[str] = None,
            download_strategy: IliasDownloadStrategy = download_modified_or_new,
            clean: bool = True,
            timeout: int = 5,
    ) -> Organizer:
        """
        Synchronizes a folder with the ILIAS instance of the KIT.
@ -137,6 +142,8 @@ class Pferd(Location):
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {download_modified_or_new})
            clean {bool} -- Whether to clean up when the method finishes.
            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                requests bug.
        """
        # This authenticator only works with the KIT ilias instance.
        authenticator = KitShibbolethAuthenticator(username=username, password=password)
@ -152,6 +159,7 @@ class Pferd(Location):
            transform=transform,
            download_strategy=download_strategy,
            clean=clean,
            timeout=timeout
        )
        self._download_summary.merge(organizer.download_summary)
@ -175,6 +183,7 @@ class Pferd(Location):
            password: Optional[str] = None,
            download_strategy: IliasDownloadStrategy = download_modified_or_new,
            clean: bool = True,
            timeout: int = 5,
    ) -> Organizer:
        """
        Synchronizes a folder with the ILIAS instance of the KIT. This method will crawl the ILIAS
@ -199,6 +208,8 @@ class Pferd(Location):
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {download_modified_or_new})
            clean {bool} -- Whether to clean up when the method finishes.
            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                requests bug.
        """
        # This authenticator only works with the KIT ilias instance.
        authenticator = KitShibbolethAuthenticator(username=username, password=password)
@ -214,12 +225,131 @@ class Pferd(Location):
            transform=transform,
            download_strategy=download_strategy,
            clean=clean,
            timeout=timeout
        )
        self._download_summary.merge(organizer.download_summary)
        return organizer
    @swallow_and_print_errors
    def ilias_kit_folder(
            self,
            target: PathLike,
            full_url: str,
            dir_filter: IliasDirectoryFilter = lambda x, y: True,
            transform: Transform = lambda x: x,
            cookies: Optional[PathLike] = None,
            username: Optional[str] = None,
            password: Optional[str] = None,
            download_strategy: IliasDownloadStrategy = download_modified_or_new,
            clean: bool = True,
            timeout: int = 5,
    ) -> Organizer:
        """
        Synchronizes a folder with a given folder on the ILIAS instance of the KIT.
        Arguments:
            target {Path}  -- the target path to write the data to
            full_url {str} -- the full url of the folder/videos/course to crawl
        Keyword Arguments:
            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
                crawler level, these directories and all of their content is skipped.
                (default: {lambdax:True})
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            cookies {Optional[Path]} -- The path to store and load cookies from.
                (default: {None})
            username {Optional[str]} -- The SCC username. If none is given, it will prompt
                the user. (default: {None})
            password {Optional[str]} -- The SCC password. If none is given, it will prompt
                the user. (default: {None})
            download_strategy {DownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {download_modified_or_new})
            clean {bool} -- Whether to clean up when the method finishes.
            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                requests bug.
        """
        # This authenticator only works with the KIT ilias instance.
        authenticator = KitShibbolethAuthenticator(username=username, password=password)
        PRETTY.starting_synchronizer(target, "ILIAS", "An ILIAS element by url")
        if not full_url.startswith("https://ilias.studium.kit.edu"):
            raise FatalException("Not a valid KIT ILIAS URL")
        organizer = self._ilias(
            target=target,
            base_url="https://ilias.studium.kit.edu/",
            crawl_function=lambda crawler: crawler.recursive_crawl_url(full_url),
            authenticator=authenticator,
            cookies=cookies,
            dir_filter=dir_filter,
            transform=transform,
            download_strategy=download_strategy,
            clean=clean,
            timeout=timeout
        )
        self._download_summary.merge(organizer.download_summary)
        return organizer
    @swallow_and_print_errors
    def ipd_kit(
            self,
            target: Union[PathLike, Organizer],
            url: str,
            transform: Transform = lambda x: x,
            download_strategy: IpdDownloadStrategy = ipd_download_new_or_modified,
            clean: bool = True
    ) -> Organizer:
        """
        Synchronizes a folder with a DIVA playlist.
        Arguments:
            target {Union[PathLike, Organizer]} -- The organizer / target folder to use.
            url {str} -- the url to the page
        Keyword Arguments:
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {diva_download_new})
            clean {bool} -- Whether to clean up when the method finishes.
        """
        tmp_dir = self._tmp_dir.new_subdir()
        if target is None:
            PRETTY.starting_synchronizer("None", "IPD", url)
            raise FatalException("Got 'None' as target directory, aborting")
        if isinstance(target, Organizer):
            organizer = target
        else:
            organizer = Organizer(self.resolve(to_path(target)))
        PRETTY.starting_synchronizer(organizer.path, "IPD", url)
        elements: List[IpdDownloadInfo] = IpdCrawler(url).crawl()
        transformed = apply_transform(transform, elements)
        if self._test_run:
            self._print_transformables(transformed)
            return organizer
        downloader = IpdDownloader(tmp_dir=tmp_dir, organizer=organizer, strategy=download_strategy)
        downloader.download_all(transformed)
        if clean:
            organizer.cleanup()
        self._download_summary.merge(organizer.download_summary)
        return organizer
    @swallow_and_print_errors
    def diva_kit(
            self,
@ -278,4 +408,6 @@ class Pferd(Location):
        if clean:
            organizer.cleanup()
        self._download_summary.merge(organizer.download_summary)
        return organizer
--- a/PFERD/transform.py
+++ b/PFERD/transform.py
@ -5,6 +5,8 @@ only files whose names match a regex, or renaming files from one numbering
 scheme to another.
 """
 import os
 import re
 from dataclasses import dataclass
 from pathlib import PurePath
 from typing import Callable, List, Optional, TypeVar
@ -45,7 +47,8 @@ def apply_transform(
 # Transform combinators
-keep = lambda path: path
+def keep(path: PurePath) -> Optional[PurePath]:
    return path
 def attempt(*args: Transform) -> Transform:
    def inner(path: PurePath) -> Optional[PurePath]:
@ -125,3 +128,15 @@ def re_rename(regex: Regex, target: str) -> Transform:
            return path.with_name(target.format(*groups))
        return None
    return inner
 def sanitize_windows_path(path: PurePath) -> Optional[PurePath]:
    """
    A small function to escape characters that are forbidden in windows path names.
    This method is a no-op on other operating systems.
    """
    # Escape windows illegal path characters
    if os.name == 'nt':
        sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
        return PurePath(*sanitized_parts)
    return path
--- a/README.md
+++ b/README.md
@ -2,6 +2,7 @@
 **P**rogramm zum **F**lotten, **E**infachen **R**unterladen von **D**ateien
 - [Quickstart with `sync_url`](#quickstart-with-sync_url)
 - [Installation](#installation)
    - [Upgrading from 2.0.0 to 2.1.0+](#upgrading-from-200-to-210)
 - [Example setup](#example-setup)
@ -12,6 +13,23 @@
        - [Transform combinators](#transform-combinators)
    - [A short, but commented example](#a-short-but-commented-example)
 ## Quickstart with `sync_url`
 The `sync_url` program allows you to just synchronize a given ILIAS URL (of a
 course, a folder, your personal desktop, etc.) without any extra configuration
 or setting up. Download the program, open ILIAS, copy the URL from the address
 bar and pass it to sync_url.
 It bundles everything it needs in one executable and is easy to
 use, but doesn't expose all the configuration options and tweaks a full install
 does.
 1. Download the `sync_url` binary from the [latest release](https://github.com/Garmelon/PFERD/releases/latest).
 2. Recognize that you most likely need to enclose the URL in `""` quotes to prevent your shell from interpreting `&` and other symbols
 3. Run the binary in your terminal (`./sync_url` or `sync_url.exe` in the CMD) to see the help and use it. I'd recommend using the `--cookies` option.  
  If you are on **Linux/Mac**, you need to *make the file executable* using `chmod +x <file>`.  
  If you are on **Mac**, you need to allow this unverified program to run (see e.g. [here](https://www.switchingtomac.com/tutorials/osx/how-to-run-unverified-apps-on-macos/))
 ## Installation
 Ensure that you have at least Python 3.8 installed.
@ -19,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.2.1
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
 ```
 The use of [venv] is recommended.
@ -42,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.2.1
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.2.1/example_config.py
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.5/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
--- a/setup.py
+++ b/setup.py
@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 setup(
    name="PFERD",
-    version="2.2.1",
+    version="2.4.5",
    packages=find_packages(),
    install_requires=[
        "requests>=2.21.0",
--- a/sync_url.py
+++ b/sync_url.py
@ -0,0 +1,69 @@
 #!/usr/bin/env python
 """
 A simple script to download a course by name from ILIAS.
 """
 import argparse
 from pathlib import Path
 from urllib.parse import urlparse
 from PFERD import Pferd
 from PFERD.cookie_jar import CookieJar
 from PFERD.ilias import (IliasCrawler, IliasElementType,
                         KitShibbolethAuthenticator)
 from PFERD.transform import sanitize_windows_path
 from PFERD.utils import to_path
 def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--test-run", action="store_true")
    parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
    parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos")
    parser.add_argument('url', help="URL to the course page")
    parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into")
    args = parser.parse_args()
    url = urlparse(args.url)
    cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
    session = cookie_jar.create_session()
    authenticator = KitShibbolethAuthenticator()
    crawler = IliasCrawler(url.scheme + '://' + url.netloc, session,
                           authenticator, lambda x, y: True)
    cookie_jar.load_cookies()
    if args.folder is not None:
        folder = args.folder
        # Initialize pferd at the *parent of the passed folder*
        # This is needed so Pferd's internal protections against escaping the working directory
        # do not trigger (e.g. if somebody names a file in ILIAS '../../bad thing.txt')
        pferd = Pferd(Path(Path(__file__).parent, folder).parent, test_run=args.test_run)
    else:
        # fetch course name from ilias
        folder = crawler.find_element_name(args.url)
        cookie_jar.save_cookies()
        # Initialize pferd at the location of the script
        pferd = Pferd(Path(__file__).parent, test_run=args.test_run)
    def dir_filter(_: Path, element: IliasElementType) -> bool:
        if args.no_videos:
            return element not in [IliasElementType.VIDEO_FILE, IliasElementType.VIDEO_FOLDER]
        return True
    pferd.enable_logging()
    # fetch
    pferd.ilias_kit_folder(
        target=folder,
        full_url=args.url,
        cookies=args.cookies,
        dir_filter=dir_filter,
        transform=sanitize_windows_path
    )
 if __name__ == "__main__":
    main()
Author	SHA1	Message	Date
I-Al-Istannen	ba9215ebe8	Bump version	2020-11-18 10:09:45 +01:00
I-Al-Istannen	8ebf0eab16	Sort download summary	2020-11-17 21:36:04 +01:00
I-Al-Istannen	cd90a60dee	Move "sanitize_windows_path" to PFERD.transform	2020-11-12 20:52:46 +01:00
I-Al-Istannen	98834c9c95	Bump version	2020-11-12 20:23:36 +01:00
I-Al-Istannen	55e9e719ad	Sanitize "/" in ilias path names	2020-11-12 20:21:24 +01:00
I-Al-Istannen	a0ae9aee27	Sanitize individual path parts	2020-11-11 09:36:20 +01:00
I-Al-Istannen	1486a63854	Do not collapse directory structure when sanitizing	2020-11-10 22:53:47 +01:00
I-Al-Istannen	733e1ae136	Bump version	2020-11-10 20:50:31 +01:00
I-Al-Istannen	4ac51048c1	Use "_" as a replacement for illegal characters	2020-11-10 20:49:14 +01:00
I-Al-Istannen	f2aba970fd	[sync_url] Sanitize path names on windows	2020-11-10 17:16:14 +01:00
I-Al-Istannen	9c4759103a	Bump patch version	2020-11-05 11:25:06 +01:00
I-Al-Istannen	316b9d7bf4	Prevent too many retries when fetching an ILIAS page	2020-11-04 22:23:56 +01:00
I-Al-Istannen	6f30adcd22	Fix quote type in README	2020-11-04 22:13:08 +01:00
I-Al-Istannen	6f78fef604	Add quoting instructions to README	2020-11-04 22:08:33 +01:00
I-Al-Istannen	f830b42a36	Fix duplicate files in download summary	2020-11-04 21:49:35 +01:00
I-Al-Istannen	ef343dec7c	Merge organizer download summaries	2020-11-04 15:06:58 +01:00
I-Al-Istannen	0da2fafcd8	Fix links outside tables	2020-11-04 14:46:15 +01:00
I-Al-Istannen	f4abe3197c	Add ipd crawler	2020-11-03 21:15:40 +01:00
I-Al-Istannen	38d4f5b4c9	Do not fail only empty courses	2020-11-03 20:09:54 +01:00
I-Al-Istannen	9ea03bda3e	Adjust release names	2020-10-30 18:14:02 +01:00
I-Al-Istannen	07de5bea8b	Explain how to run sync_url on Mac	2020-10-30 17:53:55 +01:00
I-Al-Istannen	f0d572c110	Fix a few typos in release body	2020-10-30 17:32:04 +01:00
I-Al-Istannen	076067e22d	Bump version	2020-10-30 17:28:34 +01:00
I-Al-Istannen	ebb6e63c5c	Add MacOS to CI	2020-10-30 17:23:27 +01:00
I-Al-Istannen	0c3f35a2d2	Do not provide a shorthand for "no-videos"	2020-10-30 17:01:10 +01:00
I-Al-Istannen	521890ae78	Update README.md	2020-10-28 23:24:18 +01:00
I-Al-Istannen	3f7c73df80	Release new minor version	2020-10-07 09:32:17 +02:00
I-Al-Istannen	43100f69d5	Merge pull request #10 from Garmelon/sync-url Add "Sync url" script from Christophe and release it automatically	2020-10-07 09:29:48 +02:00
I-Al-Istannen	d73c778b0a	Add sync_url instructions to README	2020-10-06 17:50:28 +02:00
I-Al-Istannen	73c3eb0984	Add option to skip videos in sync_url	2020-10-06 17:20:47 +02:00
I-Al-Istannen	a519cbe05d	Add sync_url workflow	2020-10-06 12:42:20 +02:00
I-Al-Istannen	b3ad9783c4	Ignore pyinstaller files	2020-10-06 11:43:20 +02:00
I-Al-Istannen	c1ccb6c53e	Allow crawling videos with sync_url	2020-10-06 10:46:06 +02:00
I-Al-Istannen	51a713fa04	Allow crawling courses or folders with sync_url Video folders do not work, if they are passed directly. Their containing folder must be specified instead.	2020-09-28 20:00:01 +02:00
I-Al-Istannen	74ea039458	Fix a few lint errors and pferd quirks in sync_url	2020-09-28 19:42:59 +02:00
I-Al-Istannen	aaa6a2b6a4	Merge pull request #9 from TheChristophe/master Add simple course-download-by-url script	2020-09-28 19:25:45 +02:00
I-Al-Istannen	e32a49480b	Expose methods to look up course/element names by id / url	2020-09-28 19:16:52 +02:00
Christophe	be65051f9d	Support downloading folders in get-by-url script	2020-09-28 18:16:33 +02:00
Christophe	3387bc5f20	Add simple course-download-by-url script	2020-09-28 17:49:36 +02:00
I-Al-Istannen	3f0ae729d6	Expand "is course" check to not download magazines or other weird things	2020-09-28 16:43:58 +02:00
I-Al-Istannen	8e8c1c031a	Version 2.3.0	2020-09-03 21:47:10 +02:00
I-Al-Istannen	55678d7fee	Pass string down to FileCookieJar Some python versions just can't handle it despite the documentation stating they should.	2020-08-12 09:09:14 +02:00
I-Al-Istannen	a57ee8b96b	Add timeout to video downloads to work around requests IPv6 bug	2020-08-11 14:40:30 +02:00