mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Display reason for ignoring an element in ilias crawler
This commit is contained in:
parent
dc964a9d98
commit
13bc78c889
@ -162,7 +162,7 @@ class IliasCrawler:
|
||||
element_path = Path(parent_path, link_element.getText().strip())
|
||||
|
||||
if not self.dir_filter(element_path):
|
||||
PRETTY.filtered_path(element_path)
|
||||
PRETTY.filtered_path(element_path, "user filter")
|
||||
return []
|
||||
|
||||
LOGGER.info("Searching %r", str(element_path))
|
||||
@ -190,6 +190,7 @@ class IliasCrawler:
|
||||
# A forum
|
||||
if str(img_tag["src"]).endswith("frm.svg"):
|
||||
LOGGER.debug("Skipping forum at %r", url)
|
||||
PRETTY.filtered_path(element_path, "forum")
|
||||
return []
|
||||
|
||||
# An exercise
|
||||
@ -197,6 +198,11 @@ class IliasCrawler:
|
||||
LOGGER.debug("Crawling exercises at %r", url)
|
||||
return self._crawl_exercises(element_path, url)
|
||||
|
||||
if str(img_tag["src"]).endswith("icon_webr.svg"):
|
||||
LOGGER.debug("Skipping external link at %r", url)
|
||||
PRETTY.filtered_path(element_path, "external link")
|
||||
return []
|
||||
|
||||
# Match the opencast video plugin
|
||||
if "opencast" in str(img_tag["alt"]).lower():
|
||||
LOGGER.debug("Found video site: %r", url)
|
||||
|
@ -145,13 +145,15 @@ class PrettyLogger:
|
||||
|
||||
self.logger.info(f"{Style.DIM}Ignored {str(file_name)!r}.{Style.RESET_ALL}")
|
||||
|
||||
def filtered_path(self, path: Path) -> None:
|
||||
def filtered_path(self, path: Path, reason: str) -> None:
|
||||
"""
|
||||
A crawler filter rejected the given path.
|
||||
"""
|
||||
|
||||
self.logger.info(
|
||||
f"{Style.DIM}Not considering {str(path)!r} due to filter rules.{Style.RESET_ALL}"
|
||||
f"{Style.DIM}Not considering {str(path)!r} due to filter rules"
|
||||
f" ({Style.NORMAL}{reason}{Style.DIM})."
|
||||
f"{Style.RESET_ALL}"
|
||||
)
|
||||
|
||||
def starting_synchronizer(
|
||||
|
Loading…
Reference in New Issue
Block a user