Display reason for ignoring an element in ilias crawler

This commit is contained in:
I-Al-Istannen 2020-04-23 13:54:58 +02:00
parent dc964a9d98
commit 13bc78c889
2 changed files with 11 additions and 3 deletions

View File

@ -162,7 +162,7 @@ class IliasCrawler:
element_path = Path(parent_path, link_element.getText().strip()) element_path = Path(parent_path, link_element.getText().strip())
if not self.dir_filter(element_path): if not self.dir_filter(element_path):
PRETTY.filtered_path(element_path) PRETTY.filtered_path(element_path, "user filter")
return [] return []
LOGGER.info("Searching %r", str(element_path)) LOGGER.info("Searching %r", str(element_path))
@ -190,6 +190,7 @@ class IliasCrawler:
# A forum # A forum
if str(img_tag["src"]).endswith("frm.svg"): if str(img_tag["src"]).endswith("frm.svg"):
LOGGER.debug("Skipping forum at %r", url) LOGGER.debug("Skipping forum at %r", url)
PRETTY.filtered_path(element_path, "forum")
return [] return []
# An exercise # An exercise
@ -197,6 +198,11 @@ class IliasCrawler:
LOGGER.debug("Crawling exercises at %r", url) LOGGER.debug("Crawling exercises at %r", url)
return self._crawl_exercises(element_path, url) return self._crawl_exercises(element_path, url)
if str(img_tag["src"]).endswith("icon_webr.svg"):
LOGGER.debug("Skipping external link at %r", url)
PRETTY.filtered_path(element_path, "external link")
return []
# Match the opencast video plugin # Match the opencast video plugin
if "opencast" in str(img_tag["alt"]).lower(): if "opencast" in str(img_tag["alt"]).lower():
LOGGER.debug("Found video site: %r", url) LOGGER.debug("Found video site: %r", url)

View File

@ -145,13 +145,15 @@ class PrettyLogger:
self.logger.info(f"{Style.DIM}Ignored {str(file_name)!r}.{Style.RESET_ALL}") self.logger.info(f"{Style.DIM}Ignored {str(file_name)!r}.{Style.RESET_ALL}")
def filtered_path(self, path: Path) -> None: def filtered_path(self, path: Path, reason: str) -> None:
""" """
A crawler filter rejected the given path. A crawler filter rejected the given path.
""" """
self.logger.info( self.logger.info(
f"{Style.DIM}Not considering {str(path)!r} due to filter rules.{Style.RESET_ALL}" f"{Style.DIM}Not considering {str(path)!r} due to filter rules"
f" ({Style.NORMAL}{reason}{Style.DIM})."
f"{Style.RESET_ALL}"
) )
def starting_synchronizer( def starting_synchronizer(