Satisfy pylint a bit

This commit is contained in:
I-Al-Istannen 2020-04-22 01:37:34 +02:00
parent 8891041069
commit ac65b06a8e
2 changed files with 33 additions and 32 deletions

View File

@ -147,14 +147,14 @@ class IliasCrawler:
if "opencast" in str(img_tag["alt"]).lower(): if "opencast" in str(img_tag["alt"]).lower():
LOGGER.debug("Found video site: %r", url) LOGGER.debug("Found video site: %r", url)
return self._crawl_video(path, url) return self._crawl_video_directory(path, url)
# Assume it is a folder # Assume it is a folder
folder_name = link_element.getText() folder_name = link_element.getText()
folder_path = Path(path, folder_name) folder_path = Path(path, folder_name)
return self._crawl_folder(folder_path, self._abs_url_from_link(link_element)) return self._crawl_folder(folder_path, self._abs_url_from_link(link_element))
def _crawl_video(self, path: Path, url: str) -> List[IliasDownloadInfo]: def _crawl_video_directory(self, path: Path, url: str) -> List[IliasDownloadInfo]:
initial_soup = self._get_page(url, {}) initial_soup = self._get_page(url, {})
content_link: bs4.Tag = initial_soup.select_one("#tab_series a") content_link: bs4.Tag = initial_soup.select_one("#tab_series a")
video_list_soup = self._get_page( video_list_soup = self._get_page(
@ -169,37 +169,38 @@ class IliasCrawler:
results: List[IliasDownloadInfo] = [] results: List[IliasDownloadInfo] = []
for link in video_links: for link in video_links:
video_page_url = self._abs_url_from_link(link) results += self._crawl_single_video(path, link)
modification_string = link.parent.parent.parent.select_one(
"td.std:nth-child(6)"
).getText().strip()
modification_time = datetime.datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
title = link.parent.parent.parent.select_one(
"td.std:nth-child(3)"
).getText().strip()
video_page_soup = self._get_page(video_page_url, {})
regex: re.Pattern = re.compile(
r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
)
json_match = regex.search(str(video_page_soup))
if json_match is None:
LOGGER.warning("Could not find json stream info for %r", url)
return []
json_str = json_match.group(1)
json_object = json.loads(json_str)
video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
results.append(IliasDownloadInfo(
Path(path, title), video_url, modification_time
))
return results return results
def _crawl_single_video(self, path: Path, link: bs4.Tag) -> List[IliasDownloadInfo]:
video_page_url = self._abs_url_from_link(link)
modification_string = link.parent.parent.parent.select_one(
"td.std:nth-child(6)"
).getText().strip()
modification_time = datetime.datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
title = link.parent.parent.parent.select_one(
"td.std:nth-child(3)"
).getText().strip()
video_page_soup = self._get_page(video_page_url, {})
regex: re.Pattern = re.compile(
r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
)
json_match = regex.search(str(video_page_soup))
if json_match is None:
LOGGER.warning("Could not find json stream info for %r", video_page_url)
return []
json_str = json_match.group(1)
json_object = json.loads(json_str)
video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
return [IliasDownloadInfo(Path(path, title), video_url, modification_time)]
def _crawl_folder(self, path: Path, url: str) -> List[IliasDownloadInfo]: def _crawl_folder(self, path: Path, url: str) -> List[IliasDownloadInfo]:
soup = self._get_page(url, {}) soup = self._get_page(url, {})

View File

@ -1,9 +1,9 @@
"""Contains a downloader for ILIAS.""" """Contains a downloader for ILIAS."""
import datetime import datetime
from dataclasses import dataclass, field from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List from typing import List
import bs4 import bs4
import requests import requests