From 1407c6d2641b2454398daeb29543a3fd6fd75a30 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 17 Oct 2019 22:14:32 +0200
Subject: [PATCH] Download all TGI files and not just lectures

---
 PFERD/tgi.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/PFERD/tgi.py b/PFERD/tgi.py
index 9661e9c..fcd8108 100644
--- a/PFERD/tgi.py
+++ b/PFERD/tgi.py
@@ -18,7 +18,6 @@ pretty = PrettyLogger(logger)
 class TGI:
     CRAWL_URL = "https://i11www.iti.kit.edu/teaching/{year}/tgi/index"
     BASE_URL = "https://i11www.iti.kit.edu"
-    LINK_RE = re.compile(r"^/_media/teaching/.*?/(tgi-\d+-\d+-)([^/]*\.pdf)$")
 
     def __init__(self, base_path, year="winter2019"):
         self.base_path = base_path
@@ -49,11 +48,11 @@ class TGI:
 
         files = []
 
-        for found in soup.find_all("a", href=self.LINK_RE):
+        for found in soup.select("a.mediafile.mf_pdf"):
             url = found["href"]
             full_url = self.BASE_URL + url
 
-            filename = re.search(self.LINK_RE, url).group(2)
+            filename = re.search(r"\d+(/tgi)?/(.+.pdf)", url).group(2)
             path = pathlib.PurePath(filename)
 
             logger.debug(f"Found file {filename} at {full_url}")