From 292e516297729f78b52c434f0dc715e9436ea643 Mon Sep 17 00:00:00 2001 From: Joscha Date: Fri, 24 Apr 2020 18:24:44 +0000 Subject: [PATCH] Change crawler and downloader output --- PFERD/ilias/crawler.py | 8 ++++---- PFERD/ilias/downloader.py | 2 +- PFERD/organizer.py | 2 +- PFERD/utils.py | 42 ++++++++++++++++++++++++--------------- 4 files changed, 32 insertions(+), 22 deletions(-) diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py index 216be6e..c36b7c7 100644 --- a/PFERD/ilias/crawler.py +++ b/PFERD/ilias/crawler.py @@ -162,10 +162,10 @@ class IliasCrawler: element_path = Path(parent_path, link_element.getText().strip()) if not self.dir_filter(element_path): - PRETTY.filtered_path(element_path, "user filter") + PRETTY.not_searching(element_path, "user filter") return [] - LOGGER.info("Searching %r", str(element_path)) + PRETTY.searching(element_path) found_parent: Optional[bs4.Tag] = None @@ -190,7 +190,7 @@ class IliasCrawler: # A forum if str(img_tag["src"]).endswith("frm.svg"): LOGGER.debug("Skipping forum at %r", url) - PRETTY.filtered_path(element_path, "forum") + PRETTY.not_searching(element_path, "forum") return [] # An exercise @@ -200,7 +200,7 @@ class IliasCrawler: if str(img_tag["src"]).endswith("icon_webr.svg"): LOGGER.debug("Skipping external link at %r", url) - PRETTY.filtered_path(element_path, "external link") + PRETTY.not_searching(element_path, "external link") return [] # Match the opencast video plugin diff --git a/PFERD/ilias/downloader.py b/PFERD/ilias/downloader.py index fcae6ec..915d35a 100644 --- a/PFERD/ilias/downloader.py +++ b/PFERD/ilias/downloader.py @@ -58,7 +58,7 @@ def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> b if info.modification_date.timestamp() > resolved_mod_time_seconds: return True - PRETTY.filtered_path(info.path, "Local file had newer or equal modification time") + PRETTY.ignored_file(info.path, "local file has newer or equal modification time") return False diff --git a/PFERD/organizer.py b/PFERD/organizer.py index f46e69c..c624d23 100644 --- a/PFERD/organizer.py +++ b/PFERD/organizer.py @@ -56,7 +56,7 @@ class Organizer(Location): if dst_absolute.exists() and dst_absolute.is_file(): if filecmp.cmp(str(src_absolute), str(dst_absolute), shallow=False): # Bail out, nothing more to do - PRETTY.ignored_file(dst_absolute) + PRETTY.ignored_file(dst_absolute, "same file contents") self.mark(dst) return diff --git a/PFERD/utils.py b/PFERD/utils.py index 9026644..0714c29 100644 --- a/PFERD/utils.py +++ b/PFERD/utils.py @@ -86,39 +86,49 @@ class PrettyLogger: def __init__(self, logger: logging.Logger) -> None: self.logger = logger - def modified_file(self, file_name: PurePath) -> None: + def modified_file(self, path: PurePath) -> None: """ An existing file has changed. """ self.logger.info( - f"{Fore.MAGENTA}{Style.BRIGHT}Modified {str(file_name)!r}.{Style.RESET_ALL}" + f"{Fore.MAGENTA}{Style.BRIGHT}Modified {str(path)!r}.{Style.RESET_ALL}" ) - def new_file(self, file_name: PurePath) -> None: + def new_file(self, path: PurePath) -> None: """ A new file has been downloaded. """ self.logger.info( - f"{Fore.GREEN}{Style.BRIGHT}Created {str(file_name)!r}.{Style.RESET_ALL}") + f"{Fore.GREEN}{Style.BRIGHT}Created {str(path)!r}.{Style.RESET_ALL}" + ) - def ignored_file(self, file_name: PurePath) -> None: + def ignored_file(self, path: PurePath, reason: str) -> None: """ - Nothing in particular happened to this file or directory. - """ - - self.logger.info(f"{Style.DIM}Ignored {str(file_name)!r}.{Style.RESET_ALL}") - - def filtered_path(self, path: PurePath, reason: str) -> None: - """ - A crawler filter rejected the given path. + File was not downloaded or modified. """ self.logger.info( - f"{Style.DIM}Not considering {str(path)!r} due to filter rules" - f" ({Style.NORMAL}{reason}{Style.DIM})." - f"{Style.RESET_ALL}" + f"{Style.DIM}Ignored {str(path)!r} " + f"({Style.NORMAL}{reason}{Style.DIM}).{Style.RESET_ALL}" + ) + + def searching(self, path: PurePath) -> None: + """ + A crawler searches a particular object. + """ + + self.logger.info(f"Searching {str(path)!r}") + + def not_searching(self, path: PurePath, reason: str) -> None: + """ + A crawler does not search a particular object. + """ + + self.logger.info( + f"{Style.DIM}Not searching {str(path)!r} " + f"({Style.NORMAL}{reason}{Style.DIM}).{Style.RESET_ALL}" ) def starting_synchronizer(