import logging import mimetypes import os import re from string import Template from threading import Lock from urllib.parse import urljoin import PIL.Image import PIL.ImageOps import requests import youtube_dl from django.conf import settings as srv_settings from YtManagerApp.models import Subscription, Video from YtManagerApp.models import VIDEO_ORDER_MAPPING from YtManagerApp.providers.video_provider import VideoProvider from YtManagerApp.scheduler.job import Job from YtManagerApp.utils import first_non_null log = logging.getLogger('DownloadManager') class DownloadVideoJob(Job): """ Downloads a video to the disk """ name = "DownloadVideoJob" __lock = Lock() def __init__(self, job_execution, video: Video, attempt: int = 1): super().__init__(job_execution) self._video = video self._attempt = attempt self._log_youtube_dl = self.log.getChild('youtube_dl') def get_description(self): ret = "Downloading video " + self._video.name if self._attempt > 1: ret += f" (attempt {self._attempt})" return ret def run(self): from YtManagerApp.services import Services # Issue: if multiple videos are downloaded at the same time, a race condition appears in the mkdirs() call that # youtube-dl makes, which causes it to fail with the error 'Cannot create folder - file already exists'. # For now, allow a single download instance. self.__lock.acquire() try: user = self._video.subscription.user provider: VideoProvider = Services.videoProviderManager().get(self._video) max_attempts = user.preferences['max_download_attempts'] youtube_dl_params, output_path = self.__build_youtube_dl_params(self._video) with youtube_dl.YoutubeDL(youtube_dl_params) as yt: ret = yt.download([provider.get_video_url(self._video)]) self.log.info('Download finished with code %d', ret) if ret == 0: self._video.downloaded_path = output_path self._video.save() self.log.info('Video %d [%s %s] downloaded successfully!', self._video.id, self._video.video_id, self._video.name) # update size self._video.downloaded_size = 0 for file in self._video.get_files(): self._video.downloaded_size += os.stat(file).st_size self._video.save() elif self._attempt <= max_attempts: self.log.warning('Re-enqueueing video (attempt %d/%d)', self._attempt, max_attempts) Services.videoManager().download(self._video, self._attempt + 1) else: self.log.error('Multiple attempts to download video %d [%s %s] failed!', self._video.id, self._video.video_id, self._video.name) self._video.downloaded_path = '' self._video.save() finally: self.__lock.release() def __build_youtube_dl_params(self, video: Video): sub = video.subscription user = sub.user # resolve path download_path = user.preferences['download_path'] template_dict = self.__build_template_dict(video) output_pattern = Template(user.preferences['download_file_pattern']).safe_substitute(template_dict) output_path = os.path.join(download_path, output_pattern) output_path = os.path.normpath(output_path) youtube_dl_params = { 'logger': self._log_youtube_dl, 'format': user.preferences['download_format'], 'outtmpl': output_path, 'writethumbnail': True, 'writedescription': True, 'writesubtitles': user.preferences['download_subtitles'], 'writeautomaticsub': user.preferences['download_autogenerated_subtitles'], 'allsubtitles': user.preferences['download_subtitles_all'], 'merge_output_format': 'mp4', 'postprocessors': [ { 'key': 'FFmpegMetadata' }, ] } sub_langs = user.preferences['download_subtitles_langs'].split(',') sub_langs = [i.strip() for i in sub_langs] if len(sub_langs) > 0: youtube_dl_params['subtitleslangs'] = sub_langs sub_format = user.preferences['download_subtitles_format'] if len(sub_format) > 0: youtube_dl_params['subtitlesformat'] = sub_format return youtube_dl_params, output_path def __build_template_dict(self, video: Video): return { 'channel': video.subscription.channel_name, 'channel_id': video.subscription.channel_id, 'playlist': video.subscription.name, 'playlist_id': video.subscription.playlist_id, 'playlist_index': "{:03d}".format(1 + video.playlist_index), 'title': video.name, 'id': video.video_id, } def __get_valid_path(self, path: str): """ Normalizes string, converts to lowercase, removes non-alpha characters, removes forbidden characters. """ import unicodedata value = unicodedata.normalize('NFKD', path).encode('ascii', 'ignore').decode('ascii') value = re.sub('[:"*]', '', value).strip() value = re.sub('[?<>|]', '#', value) return value class DownloadManager(object): def __init__(self): pass def download_video(self, video: Video, attempt: int = 1): from YtManagerApp.services import Services Services.scheduler().add_job(DownloadVideoJob, args=[video, attempt]) def __get_subscription_config(self, sub: Subscription): user = sub.user enabled = first_non_null(sub.auto_download, user.preferences['auto_download']) global_limit = user.preferences['download_global_limit'] limit = first_non_null(sub.download_limit, user.preferences['download_subscription_limit']) order = first_non_null(sub.download_order, user.preferences['download_order']) order = VIDEO_ORDER_MAPPING[order] return enabled, global_limit, limit, order def process_subscription(self, sub: Subscription): from YtManagerApp.services import Services log.info('Processing subscription %d [%s %s]', sub.id, sub.playlist_id, sub.id) enabled, global_limit, limit, order = self.__get_subscription_config(sub) log.info('Determined settings enabled=%s global_limit=%d limit=%d order="%s"', enabled, global_limit, limit, order) if enabled: videos_to_download = Video.objects\ .filter(subscription=sub, downloaded_path__isnull=True, watched=False)\ .order_by(order) log.info('%d download candidates.', len(videos_to_download)) if global_limit > 0: global_downloaded = Video.objects.filter(subscription__user=sub.user, downloaded_path__isnull=False).count() allowed_count = max(global_limit - global_downloaded, 0) videos_to_download = videos_to_download[0:allowed_count] log.info('Global limit is set, can only download up to %d videos.', allowed_count) if limit > 0: sub_downloaded = Video.objects.filter(subscription=sub, downloaded_path__isnull=False).count() allowed_count = max(limit - sub_downloaded, 0) videos_to_download = videos_to_download[0:allowed_count] log.info('Limit is set, can only download up to %d videos.', allowed_count) # enqueue download for video in videos_to_download: log.info('Enqueuing video %d [%s %s] index=%d', video.id, video.video_id, video.name, video.playlist_index) Services.videoManager().download(video) log.info('Finished processing subscription %d [%s %s]', sub.id, sub.playlist_id, sub.id) def process_all_subscriptions(self): for subscription in Subscription.objects.all(): self.process_subscription(subscription) def fetch_thumbnail(self, url, object_type, identifier, thumb_size): log.info('Fetching thumbnail url=%s object_type=%s identifier=%s', url, object_type, identifier) # Make request to obtain mime type try: response = requests.get(url, stream=True) except requests.exceptions.RequestException as e: log.error('Failed to fetch thumbnail %s. Error: %s', url, e) return url ext = mimetypes.guess_extension(response.headers['Content-Type']) # Build file path file_name = f"{identifier}{ext}" abs_path_dir = os.path.join(srv_settings.MEDIA_ROOT, "thumbs", object_type) abs_path = os.path.join(abs_path_dir, file_name) abs_path_tmp = file_name + '.tmp' # Store image try: os.makedirs(abs_path_dir, exist_ok=True) with open(abs_path_tmp, "wb") as f: for chunk in response.iter_content(chunk_size=1024): if chunk: f.write(chunk) # Resize and crop to thumbnail size image = PIL.Image.open(abs_path_tmp) image = PIL.ImageOps.fit(image, thumb_size) image.save(abs_path) image.close() # Delete temp file os.unlink(abs_path_tmp) except requests.exceptions.RequestException as e: log.error('Error while downloading stream for thumbnail %s. Error: %s', url, e) return url except OSError as e: log.error('Error while writing to file %s for thumbnail %s. Error: %s', abs_path, url, e) return url # Return media_url = urljoin(srv_settings.MEDIA_URL, f"thumbs/{object_type}/{file_name}") return media_url