mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Also crawl .c/.java/.zip from IPD page
This commit is contained in:
parent
9a9018751e
commit
946b7a7931
@ -82,7 +82,10 @@ class IpdCrawler:
|
||||
|
||||
items: List[IpdDownloadInfo] = []
|
||||
|
||||
for link in page.findAll(name="a", attrs={"href": lambda x: x and x.endswith("pdf")}):
|
||||
def is_relevant_url(x: str) -> bool:
|
||||
return x.endswith(".pdf") or x.endswith(".c") or x.endswith(".java") or x.endswith(".zip")
|
||||
|
||||
for link in page.findAll(name="a", attrs={"href": lambda x: x and is_relevant_url(x)}):
|
||||
href: str = link.attrs.get("href")
|
||||
name = href.split("/")[-1]
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user