ytsm/app/YtManagerApp/management/download_manager.py

256 lines
9.8 KiB
Python

import logging
import mimetypes
import os
import re
from string import Template
from threading import Lock
from urllib.parse import urljoin
import PIL.Image
import PIL.ImageOps
import requests
import youtube_dl
from django.conf import settings as srv_settings
from YtManagerApp.models import Subscription, Video
from YtManagerApp.models import VIDEO_ORDER_MAPPING
from YtManagerApp.providers.video_provider import VideoProvider
from YtManagerApp.scheduler.job import Job
from YtManagerApp.utils import first_non_null
log = logging.getLogger('DownloadManager')
class DownloadVideoJob(Job):
"""
Downloads a video to the disk
"""
name = "DownloadVideoJob"
__lock = Lock()
def __init__(self, job_execution, video: Video, attempt: int = 1):
super().__init__(job_execution)
self._video = video
self._attempt = attempt
self._log_youtube_dl = self.log.getChild('youtube_dl')
def get_description(self):
ret = "Downloading video " + self._video.name
if self._attempt > 1:
ret += f" (attempt {self._attempt})"
return ret
def run(self):
from YtManagerApp.services import Services
# Issue: if multiple videos are downloaded at the same time, a race condition appears in the mkdirs() call that
# youtube-dl makes, which causes it to fail with the error 'Cannot create folder - file already exists'.
# For now, allow a single download instance.
self.__lock.acquire()
try:
user = self._video.subscription.user
provider: VideoProvider = Services.videoProviderManager().get(self._video)
max_attempts = user.preferences['max_download_attempts']
youtube_dl_params, output_path = self.__build_youtube_dl_params(self._video)
with youtube_dl.YoutubeDL(youtube_dl_params) as yt:
ret = yt.download([provider.get_video_url(self._video)])
self.log.info('Download finished with code %d', ret)
if ret == 0:
self._video.downloaded_path = output_path
self._video.save()
self.log.info('Video %d [%s %s] downloaded successfully!', self._video.id, self._video.video_id,
self._video.name)
# update size
self._video.downloaded_size = 0
for file in self._video.get_files():
self._video.downloaded_size += os.stat(file).st_size
self._video.save()
elif self._attempt <= max_attempts:
self.log.warning('Re-enqueueing video (attempt %d/%d)', self._attempt, max_attempts)
Services.videoManager().download(self._video, self._attempt + 1)
else:
self.log.error('Multiple attempts to download video %d [%s %s] failed!', self._video.id,
self._video.video_id, self._video.name)
self._video.downloaded_path = ''
self._video.save()
finally:
self.__lock.release()
def __build_youtube_dl_params(self, video: Video):
sub = video.subscription
user = sub.user
# resolve path
download_path = user.preferences['download_path']
template_dict = self.__build_template_dict(video)
output_pattern = Template(user.preferences['download_file_pattern']).safe_substitute(template_dict)
output_path = os.path.join(download_path, output_pattern)
output_path = os.path.normpath(output_path)
youtube_dl_params = {
'logger': self._log_youtube_dl,
'format': user.preferences['download_format'],
'outtmpl': output_path,
'writethumbnail': True,
'writedescription': True,
'writesubtitles': user.preferences['download_subtitles'],
'writeautomaticsub': user.preferences['download_autogenerated_subtitles'],
'allsubtitles': user.preferences['download_subtitles_all'],
'merge_output_format': 'mp4',
'postprocessors': [
{
'key': 'FFmpegMetadata'
},
]
}
sub_langs = user.preferences['download_subtitles_langs'].split(',')
sub_langs = [i.strip() for i in sub_langs]
if len(sub_langs) > 0:
youtube_dl_params['subtitleslangs'] = sub_langs
sub_format = user.preferences['download_subtitles_format']
if len(sub_format) > 0:
youtube_dl_params['subtitlesformat'] = sub_format
return youtube_dl_params, output_path
def __build_template_dict(self, video: Video):
return {
'channel': video.subscription.channel_name,
'channel_id': video.subscription.channel_id,
'playlist': video.subscription.name,
'playlist_id': video.subscription.playlist_id,
'playlist_index': "{:03d}".format(1 + video.playlist_index),
'title': video.name,
'id': video.video_id,
}
def __get_valid_path(self, path: str):
"""
Normalizes string, converts to lowercase, removes non-alpha characters, removes forbidden characters.
"""
import unicodedata
value = unicodedata.normalize('NFKD', path).encode('ascii', 'ignore').decode('ascii')
value = re.sub('[:"*]', '', value).strip()
value = re.sub('[?<>|]', '#', value)
return value
class DownloadManager(object):
def __init__(self):
pass
def download_video(self, video: Video, attempt: int = 1):
from YtManagerApp.services import Services
Services.scheduler().add_job(DownloadVideoJob, args=[video, attempt])
def __get_subscription_config(self, sub: Subscription):
user = sub.user
enabled = first_non_null(sub.auto_download, user.preferences['auto_download'])
global_limit = user.preferences['download_global_limit']
limit = first_non_null(sub.download_limit, user.preferences['download_subscription_limit'])
order = first_non_null(sub.download_order, user.preferences['download_order'])
order = VIDEO_ORDER_MAPPING[order]
return enabled, global_limit, limit, order
def process_subscription(self, sub: Subscription):
from YtManagerApp.services import Services
log.info('Processing subscription %d [%s %s]', sub.id, sub.playlist_id, sub.id)
enabled, global_limit, limit, order = self.__get_subscription_config(sub)
log.info('Determined settings enabled=%s global_limit=%d limit=%d order="%s"', enabled, global_limit, limit, order)
if enabled:
videos_to_download = Video.objects\
.filter(subscription=sub, downloaded_path__isnull=True, watched=False)\
.order_by(order)
log.info('%d download candidates.', len(videos_to_download))
if global_limit > 0:
global_downloaded = Video.objects.filter(subscription__user=sub.user, downloaded_path__isnull=False).count()
allowed_count = max(global_limit - global_downloaded, 0)
videos_to_download = videos_to_download[0:allowed_count]
log.info('Global limit is set, can only download up to %d videos.', allowed_count)
if limit > 0:
sub_downloaded = Video.objects.filter(subscription=sub, downloaded_path__isnull=False).count()
allowed_count = max(limit - sub_downloaded, 0)
videos_to_download = videos_to_download[0:allowed_count]
log.info('Limit is set, can only download up to %d videos.', allowed_count)
# enqueue download
for video in videos_to_download:
log.info('Enqueuing video %d [%s %s] index=%d', video.id, video.video_id, video.name, video.playlist_index)
Services.videoManager().download(video)
log.info('Finished processing subscription %d [%s %s]', sub.id, sub.playlist_id, sub.id)
def process_all_subscriptions(self):
for subscription in Subscription.objects.all():
self.process_subscription(subscription)
def fetch_thumbnail(self, url, object_type, identifier, thumb_size):
log.info('Fetching thumbnail url=%s object_type=%s identifier=%s', url, object_type, identifier)
# Make request to obtain mime type
try:
response = requests.get(url, stream=True)
except requests.exceptions.RequestException as e:
log.error('Failed to fetch thumbnail %s. Error: %s', url, e)
return url
ext = mimetypes.guess_extension(response.headers['Content-Type'])
# Build file path
file_name = f"{identifier}{ext}"
abs_path_dir = os.path.join(srv_settings.MEDIA_ROOT, "thumbs", object_type)
abs_path = os.path.join(abs_path_dir, file_name)
abs_path_tmp = file_name + '.tmp'
# Store image
try:
os.makedirs(abs_path_dir, exist_ok=True)
with open(abs_path_tmp, "wb") as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
# Resize and crop to thumbnail size
image = PIL.Image.open(abs_path_tmp)
image = PIL.ImageOps.fit(image, thumb_size)
image.save(abs_path)
image.close()
# Delete temp file
os.unlink(abs_path_tmp)
except requests.exceptions.RequestException as e:
log.error('Error while downloading stream for thumbnail %s. Error: %s', url, e)
return url
except OSError as e:
log.error('Error while writing to file %s for thumbnail %s. Error: %s', abs_path, url, e)
return url
# Return
media_url = urljoin(srv_settings.MEDIA_URL, f"thumbs/{object_type}/{file_name}")
return media_url