mirror of
				https://github.com/Garmelon/PFERD.git
				synced 2025-11-03 22:23:41 +01:00 
			
		
		
		
	Download all TGI files and not just lectures
This commit is contained in:
		@@ -18,7 +18,6 @@ pretty = PrettyLogger(logger)
 | 
				
			|||||||
class TGI:
 | 
					class TGI:
 | 
				
			||||||
    CRAWL_URL = "https://i11www.iti.kit.edu/teaching/{year}/tgi/index"
 | 
					    CRAWL_URL = "https://i11www.iti.kit.edu/teaching/{year}/tgi/index"
 | 
				
			||||||
    BASE_URL = "https://i11www.iti.kit.edu"
 | 
					    BASE_URL = "https://i11www.iti.kit.edu"
 | 
				
			||||||
    LINK_RE = re.compile(r"^/_media/teaching/.*?/(tgi-\d+-\d+-)([^/]*\.pdf)$")
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, base_path, year="winter2019"):
 | 
					    def __init__(self, base_path, year="winter2019"):
 | 
				
			||||||
        self.base_path = base_path
 | 
					        self.base_path = base_path
 | 
				
			||||||
@@ -49,11 +48,11 @@ class TGI:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        files = []
 | 
					        files = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for found in soup.find_all("a", href=self.LINK_RE):
 | 
					        for found in soup.select("a.mediafile.mf_pdf"):
 | 
				
			||||||
            url = found["href"]
 | 
					            url = found["href"]
 | 
				
			||||||
            full_url = self.BASE_URL + url
 | 
					            full_url = self.BASE_URL + url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            filename = re.search(self.LINK_RE, url).group(2)
 | 
					            filename = re.search(r"\d+(/tgi)?/(.+.pdf)", url).group(2)
 | 
				
			||||||
            path = pathlib.PurePath(filename)
 | 
					            path = pathlib.PurePath(filename)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            logger.debug(f"Found file {filename} at {full_url}")
 | 
					            logger.debug(f"Found file {filename} at {full_url}")
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user