Bump patch version

Prevent too many retries when fetching an ILIAS page
Fix quote type in README
2023-12-21 10:23:01 +01:00 · 2020-11-05 11:25:06 +01:00 · 2020-11-04 22:23:56 +01:00 · 2020-11-04 22:13:08 +01:00 · 2020-11-04 22:08:33 +01:00 · 2020-11-04 21:49:35 +01:00
8 changed files with 230 additions and 11 deletions
--- a/.github/workflows/package.yml
+++ b/.github/workflows/package.yml
@ -67,7 +67,7 @@ jobs:
    - name: "Upload release artifacts"
      uses: softprops/action-gh-release@v1
      with:
-        body: "Download the correct sync_url for your platform and run it in the terminal or CMD. You might need to make it executable on Linux/Mac with `chmod +x <file>`."
+        body: "Download the correct sync_url for your platform and run it in the terminal or CMD. You might need to make it executable on Linux/Mac with `chmod +x <file>`. Also please enclose the *url you pass to the program in double quotes* or your shell might silently screw it up!"
        files: |
          pferd_sync_url_mac
          pferd_sync_url_linux
--- a/PFERD/download_summary.py
+++ b/PFERD/download_summary.py
@ -40,9 +40,9 @@ class DownloadSummary:
        """
        Merges ourselves with the passed summary. Modifies this object, but not the passed one.
        """
-        self._new_files += summary.new_files
+        self._new_files = list(set(self._new_files + summary.new_files))
-        self._modified_files += summary.modified_files
+        self._modified_files = list(set(self._modified_files + summary.modified_files))
-        self._deleted_files += summary.deleted_files
+        self._deleted_files = list(set(self._deleted_files + summary.deleted_files))
    def add_deleted_file(self, path: Path) -> None:
        """
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@ -593,10 +593,17 @@ class IliasCrawler:
        return results
-    def _get_page(self, url: str, params: Dict[str, Any]) -> bs4.BeautifulSoup:
+    def _get_page(self, url: str, params: Dict[str, Any],
                  retry_count: int = 0) -> bs4.BeautifulSoup:
        """
        Fetches a page from ILIAS, authenticating when needed.
        """
        if retry_count >= 4:
            raise FatalException("Could not get a proper page after 4 tries. "
                                 "Maybe your URL is wrong, authentication fails continuously, "
                                 "your ILIAS connection is spotty or ILIAS is not well.")
        LOGGER.debug("Fetching %r", url)
        response = self._session.get(url, params=params)
@ -617,7 +624,7 @@ class IliasCrawler:
        self._authenticator.authenticate(self._session)
-        return self._get_page(url, params)
+        return self._get_page(url, params, retry_count + 1)
    @staticmethod
    def _is_logged_in(soup: bs4.BeautifulSoup) -> bool:
--- a/PFERD/ipd.py
+++ b/PFERD/ipd.py
@ -0,0 +1,151 @@
 """
 Utility functions and a scraper/downloader for the IPD pages.
 """
 import datetime
 import logging
 import math
 import os
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Callable, List, Optional
 from urllib.parse import urljoin
 import bs4
 import requests
 from PFERD.errors import FatalException
 from PFERD.utils import soupify
 from .logging import PrettyLogger
 from .organizer import Organizer
 from .tmp_dir import TmpDir
 from .transform import Transformable
 from .utils import stream_to_path
 LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
@dataclass
 class IpdDownloadInfo(Transformable):
    """
    Information about an ipd entry.
    """
    url: str
    modification_date: Optional[datetime.datetime]
 IpdDownloadStrategy = Callable[[Organizer, IpdDownloadInfo], bool]
 def ipd_download_new_or_modified(organizer: Organizer, info: IpdDownloadInfo) -> bool:
    """
    Accepts new files or files with a more recent modification date.
    """
    resolved_file = organizer.resolve(info.path)
    if not resolved_file.exists():
        return True
    if not info.modification_date:
        PRETTY.ignored_file(info.path, "could not find modification time, file exists")
        return False
    resolved_mod_time_seconds = resolved_file.stat().st_mtime
    # Download if the info is newer
    if info.modification_date.timestamp() > resolved_mod_time_seconds:
        return True
    PRETTY.ignored_file(info.path, "local file has newer or equal modification time")
    return False
 class IpdCrawler:
    # pylint: disable=too-few-public-methods
    """
    A crawler for IPD pages.
    """
    def __init__(self, base_url: str):
        self._base_url = base_url
    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
        """
        Create an absolute url from an <a> tag.
        """
        return urljoin(self._base_url, link_tag.get("href"))
    def crawl(self) -> List[IpdDownloadInfo]:
        """
        Crawls the playlist given in the constructor.
        """
        page = soupify(requests.get(self._base_url))
        items: List[IpdDownloadInfo] = []
        for link in page.findAll(name="a", attrs={"href": lambda x: x and x.endswith("pdf")}):
            href: str = link.attrs.get("href")
            name = href.split("/")[-1]
            modification_date: Optional[datetime.datetime] = None
            try:
                enclosing_row: bs4.Tag = link.findParent(name="tr")
                if enclosing_row:
                    date_text = enclosing_row.find(name="td").text
                    modification_date = datetime.datetime.strptime(date_text, "%d.%m.%Y")
            except ValueError:
                modification_date = None
            items.append(IpdDownloadInfo(
                Path(name),
                url=self._abs_url_from_link(link),
                modification_date=modification_date
            ))
        return items
 class IpdDownloader:
    """
    A downloader for ipd files.
    """
    def __init__(self, tmp_dir: TmpDir, organizer: Organizer, strategy: IpdDownloadStrategy):
        self._tmp_dir = tmp_dir
        self._organizer = organizer
        self._strategy = strategy
        self._session = requests.session()
    def download_all(self, infos: List[IpdDownloadInfo]) -> None:
        """
        Download multiple files one after the other.
        """
        for info in infos:
            self.download(info)
    def download(self, info: IpdDownloadInfo) -> None:
        """
        Download a single file.
        """
        if not self._strategy(self._organizer, info):
            self._organizer.mark(info.path)
            return
        with self._session.get(info.url, stream=True) as response:
            if response.status_code == 200:
                tmp_file = self._tmp_dir.new_path()
                stream_to_path(response, tmp_file, info.path.name)
                dst_path = self._organizer.accept_file(tmp_file, info.path)
                if dst_path and info.modification_date:
                    os.utime(
                        dst_path,
                        times=(
                            math.ceil(info.modification_date.timestamp()),
                            math.ceil(info.modification_date.timestamp())
                        )
                    )
            elif response.status_code == 403:
                raise FatalException("Received 403. Are you not using the KIT VPN?")
            else:
                PRETTY.warning(f"Could not download file, got response {response.status_code}")
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@ -124,6 +124,8 @@ class Organizer(Location):
        self._cleanup(self.path)
    def _cleanup(self, start_dir: Path) -> None:
        if not start_dir.exists():
            return
        paths: List[Path] = list(start_dir.iterdir())
        # Recursively clean paths
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@ -14,6 +14,8 @@ from .errors import FatalException, swallow_and_print_errors
 from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
                    IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy,
                    KitShibbolethAuthenticator, download_modified_or_new)
 from .ipd import (IpdCrawler, IpdDownloader, IpdDownloadInfo,
                  IpdDownloadStrategy, ipd_download_new_or_modified)
 from .location import Location
 from .logging import PrettyLogger, enable_logging
 from .organizer import Organizer
@ -294,6 +296,60 @@ class Pferd(Location):
        return organizer
    @swallow_and_print_errors
    def ipd_kit(
            self,
            target: Union[PathLike, Organizer],
            url: str,
            transform: Transform = lambda x: x,
            download_strategy: IpdDownloadStrategy = ipd_download_new_or_modified,
            clean: bool = True
    ) -> Organizer:
        """
        Synchronizes a folder with a DIVA playlist.
        Arguments:
            target {Union[PathLike, Organizer]} -- The organizer / target folder to use.
            url {str} -- the url to the page
        Keyword Arguments:
            transform {Transform} -- A transformation function for the output paths. Return None
                to ignore a file. (default: {lambdax:x})
            download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
                be downloaded. Can save bandwidth and reduce the number of requests.
                (default: {diva_download_new})
            clean {bool} -- Whether to clean up when the method finishes.
        """
        tmp_dir = self._tmp_dir.new_subdir()
        if target is None:
            PRETTY.starting_synchronizer("None", "IPD", url)
            raise FatalException("Got 'None' as target directory, aborting")
        if isinstance(target, Organizer):
            organizer = target
        else:
            organizer = Organizer(self.resolve(to_path(target)))
        PRETTY.starting_synchronizer(organizer.path, "IPD", url)
        elements: List[IpdDownloadInfo] = IpdCrawler(url).crawl()
        transformed = apply_transform(transform, elements)
        if self._test_run:
            self._print_transformables(transformed)
            return organizer
        downloader = IpdDownloader(tmp_dir=tmp_dir, organizer=organizer, strategy=download_strategy)
        downloader.download_all(transformed)
        if clean:
            organizer.cleanup()
        self._download_summary.merge(organizer.download_summary)
        return organizer
    @swallow_and_print_errors
    def diva_kit(
            self,
@ -352,4 +408,6 @@ class Pferd(Location):
        if clean:
            organizer.cleanup()
        self._download_summary.merge(organizer.download_summary)
        return organizer
--- a/README.md
+++ b/README.md
@ -25,7 +25,8 @@ use, but doesn't expose all the configuration options and tweaks a full install
 does.
 1. Download the `sync_url` binary from the [latest release](https://github.com/Garmelon/PFERD/releases/latest).
-2. Run the binary in your terminal (`./sync_url` or `sync_url.exe` in the CMD) to see the help and use it. I'd recommend using the `--cookies` option.  
+2. Recognize that you most likely need to enclose the URL in `""` quotes to prevent your shell from interpreting `&` and other symbols
 3. Run the binary in your terminal (`./sync_url` or `sync_url.exe` in the CMD) to see the help and use it. I'd recommend using the `--cookies` option.  
  If you are on **Linux/Mac**, you need to *make the file executable* using `chmod +x <file>`.  
  If you are on **Mac**, you need to allow this unverified program to run (see e.g. [here](https://www.switchingtomac.com/tutorials/osx/how-to-run-unverified-apps-on-macos/))
@ -36,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.4.1
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.2
 ```
 The use of [venv] is recommended.
@ -59,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.4.1
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.2
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.1/example_config.py
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.2/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
--- a/setup.py
+++ b/setup.py
@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 setup(
    name="PFERD",
-    version="2.4.1",
+    version="2.4.2",
    packages=find_packages(),
    install_requires=[
        "requests>=2.21.0",
Author	SHA1	Message	Date
I-Al-Istannen	9c4759103a	Bump patch version	2020-11-05 11:25:06 +01:00
I-Al-Istannen	316b9d7bf4	Prevent too many retries when fetching an ILIAS page	2020-11-04 22:23:56 +01:00
I-Al-Istannen	6f30adcd22	Fix quote type in README	2020-11-04 22:13:08 +01:00
I-Al-Istannen	6f78fef604	Add quoting instructions to README	2020-11-04 22:08:33 +01:00
I-Al-Istannen	f830b42a36	Fix duplicate files in download summary	2020-11-04 21:49:35 +01:00
I-Al-Istannen	ef343dec7c	Merge organizer download summaries	2020-11-04 15:06:58 +01:00
I-Al-Istannen	0da2fafcd8	Fix links outside tables	2020-11-04 14:46:15 +01:00
I-Al-Istannen	f4abe3197c	Add ipd crawler	2020-11-03 21:15:40 +01:00
I-Al-Istannen	38d4f5b4c9	Do not fail only empty courses	2020-11-03 20:09:54 +01:00