From af2cc1169ace7154349518f7f709023eeb76ba95 Mon Sep 17 00:00:00 2001 From: Joscha Date: Thu, 5 May 2022 14:23:19 +0200 Subject: [PATCH] Mention href for users of link_regex option --- CHANGELOG.md | 1 + PFERD/crawl/kit_ipd_crawler.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de7b795..959fda0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ ambiguous situations. ### Changed - Add `cpp` extension to default `link_regex` of IPD crawler +- Mention hrefs in IPD crawler for users of `link_regex` option ### Fixed - IPD crawler crashes on some sites diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py index 58e71f8..78fe0b1 100644 --- a/PFERD/crawl/kit_ipd_crawler.py +++ b/PFERD/crawl/kit_ipd_crawler.py @@ -45,7 +45,7 @@ class KitIpdFolder: def explain(self) -> None: log.explain_topic(f"Folder {self.name!r}") for file in self.files: - log.explain(f"File {file.name!r}") + log.explain(f"File {file.name!r} (href={file.url!r})") def __hash__(self) -> int: return self.name.__hash__() @@ -113,7 +113,7 @@ class KitIpdCrawler(HttpCrawler): else: file = self._extract_file(element) items.add(file) - log.explain_topic(f"Orphan file {file.name!r}") + log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})") log.explain("Attributing it to root folder") return items