Also crawl .c/.java/.zip from IPD page

This commit is contained in:
I-Al-Istannen 2021-02-09 12:30:59 +01:00
parent 9a9018751e
commit 946b7a7931

View File

@ -82,7 +82,10 @@ class IpdCrawler:
items: List[IpdDownloadInfo] = [] items: List[IpdDownloadInfo] = []
for link in page.findAll(name="a", attrs={"href": lambda x: x and x.endswith("pdf")}): def is_relevant_url(x: str) -> bool:
return x.endswith(".pdf") or x.endswith(".c") or x.endswith(".java") or x.endswith(".zip")
for link in page.findAll(name="a", attrs={"href": lambda x: x and is_relevant_url(x)}):
href: str = link.attrs.get("href") href: str = link.attrs.get("href")
name = href.split("/")[-1] name = href.split("/")[-1]