ytsm/app/YtManagerApp/management/download_manager.py

256 lines
9.8 KiB
Python
Raw Permalink Normal View History

2020-04-10 21:30:24 +00:00
import logging
import mimetypes
import os
import re
from string import Template
from threading import Lock
from urllib.parse import urljoin
import PIL.Image
import PIL.ImageOps
import requests
import youtube_dl
from django.conf import settings as srv_settings
from YtManagerApp.models import Subscription, Video
from YtManagerApp.models import VIDEO_ORDER_MAPPING
from YtManagerApp.providers.video_provider import VideoProvider
from YtManagerApp.scheduler.job import Job
from YtManagerApp.utils import first_non_null
log = logging.getLogger('DownloadManager')
class DownloadVideoJob(Job):
"""
Downloads a video to the disk
"""
name = "DownloadVideoJob"
__lock = Lock()
def __init__(self, job_execution, video: Video, attempt: int = 1):
super().__init__(job_execution)
self._video = video
self._attempt = attempt
self._log_youtube_dl = self.log.getChild('youtube_dl')
def get_description(self):
ret = "Downloading video " + self._video.name
if self._attempt > 1:
ret += f" (attempt {self._attempt})"
return ret
def run(self):
from YtManagerApp.services import Services
# Issue: if multiple videos are downloaded at the same time, a race condition appears in the mkdirs() call that
# youtube-dl makes, which causes it to fail with the error 'Cannot create folder - file already exists'.
# For now, allow a single download instance.
self.__lock.acquire()
try:
user = self._video.subscription.user
provider: VideoProvider = Services.videoProviderManager().get(self._video)
max_attempts = user.preferences['max_download_attempts']
youtube_dl_params, output_path = self.__build_youtube_dl_params(self._video)
with youtube_dl.YoutubeDL(youtube_dl_params) as yt:
ret = yt.download([provider.get_video_url(self._video)])
self.log.info('Download finished with code %d', ret)
if ret == 0:
self._video.downloaded_path = output_path
self._video.save()
self.log.info('Video %d [%s %s] downloaded successfully!', self._video.id, self._video.video_id,
self._video.name)
# update size
self._video.downloaded_size = 0
for file in self._video.get_files():
self._video.downloaded_size += os.stat(file).st_size
self._video.save()
elif self._attempt <= max_attempts:
self.log.warning('Re-enqueueing video (attempt %d/%d)', self._attempt, max_attempts)
Services.videoManager().download(self._video, self._attempt + 1)
else:
self.log.error('Multiple attempts to download video %d [%s %s] failed!', self._video.id,
self._video.video_id, self._video.name)
self._video.downloaded_path = ''
self._video.save()
finally:
self.__lock.release()
def __build_youtube_dl_params(self, video: Video):
sub = video.subscription
user = sub.user
# resolve path
download_path = user.preferences['download_path']
template_dict = self.__build_template_dict(video)
output_pattern = Template(user.preferences['download_file_pattern']).safe_substitute(template_dict)
output_path = os.path.join(download_path, output_pattern)
output_path = os.path.normpath(output_path)
youtube_dl_params = {
'logger': self._log_youtube_dl,
'format': user.preferences['download_format'],
'outtmpl': output_path,
'writethumbnail': True,
'writedescription': True,
'writesubtitles': user.preferences['download_subtitles'],
'writeautomaticsub': user.preferences['download_autogenerated_subtitles'],
'allsubtitles': user.preferences['download_subtitles_all'],
'merge_output_format': 'mp4',
'postprocessors': [
{
'key': 'FFmpegMetadata'
},
]
}
sub_langs = user.preferences['download_subtitles_langs'].split(',')
sub_langs = [i.strip() for i in sub_langs]
if len(sub_langs) > 0:
youtube_dl_params['subtitleslangs'] = sub_langs
sub_format = user.preferences['download_subtitles_format']
if len(sub_format) > 0:
youtube_dl_params['subtitlesformat'] = sub_format
return youtube_dl_params, output_path
def __build_template_dict(self, video: Video):
return {
'channel': video.subscription.channel_name,
'channel_id': video.subscription.channel_id,
'playlist': video.subscription.name,
'playlist_id': video.subscription.playlist_id,
'playlist_index': "{:03d}".format(1 + video.playlist_index),
'title': video.name,
'id': video.video_id,
}
def __get_valid_path(self, path: str):
"""
Normalizes string, converts to lowercase, removes non-alpha characters, removes forbidden characters.
"""
import unicodedata
value = unicodedata.normalize('NFKD', path).encode('ascii', 'ignore').decode('ascii')
value = re.sub('[:"*]', '', value).strip()
value = re.sub('[?<>|]', '#', value)
return value
class DownloadManager(object):
def __init__(self):
pass
def download_video(self, video: Video, attempt: int = 1):
from YtManagerApp.services import Services
Services.scheduler().add_job(DownloadVideoJob, args=[video, attempt])
def __get_subscription_config(self, sub: Subscription):
user = sub.user
enabled = first_non_null(sub.auto_download, user.preferences['auto_download'])
global_limit = user.preferences['download_global_limit']
limit = first_non_null(sub.download_limit, user.preferences['download_subscription_limit'])
order = first_non_null(sub.download_order, user.preferences['download_order'])
order = VIDEO_ORDER_MAPPING[order]
return enabled, global_limit, limit, order
def process_subscription(self, sub: Subscription):
from YtManagerApp.services import Services
log.info('Processing subscription %d [%s %s]', sub.id, sub.playlist_id, sub.id)
enabled, global_limit, limit, order = self.__get_subscription_config(sub)
log.info('Determined settings enabled=%s global_limit=%d limit=%d order="%s"', enabled, global_limit, limit, order)
if enabled:
videos_to_download = Video.objects\
.filter(subscription=sub, downloaded_path__isnull=True, watched=False)\
.order_by(order)
log.info('%d download candidates.', len(videos_to_download))
if global_limit > 0:
global_downloaded = Video.objects.filter(subscription__user=sub.user, downloaded_path__isnull=False).count()
allowed_count = max(global_limit - global_downloaded, 0)
videos_to_download = videos_to_download[0:allowed_count]
log.info('Global limit is set, can only download up to %d videos.', allowed_count)
if limit > 0:
sub_downloaded = Video.objects.filter(subscription=sub, downloaded_path__isnull=False).count()
allowed_count = max(limit - sub_downloaded, 0)
videos_to_download = videos_to_download[0:allowed_count]
log.info('Limit is set, can only download up to %d videos.', allowed_count)
# enqueue download
for video in videos_to_download:
log.info('Enqueuing video %d [%s %s] index=%d', video.id, video.video_id, video.name, video.playlist_index)
Services.videoManager().download(video)
log.info('Finished processing subscription %d [%s %s]', sub.id, sub.playlist_id, sub.id)
def process_all_subscriptions(self):
for subscription in Subscription.objects.all():
self.process_subscription(subscription)
def fetch_thumbnail(self, url, object_type, identifier, thumb_size):
log.info('Fetching thumbnail url=%s object_type=%s identifier=%s', url, object_type, identifier)
# Make request to obtain mime type
try:
response = requests.get(url, stream=True)
except requests.exceptions.RequestException as e:
log.error('Failed to fetch thumbnail %s. Error: %s', url, e)
return url
ext = mimetypes.guess_extension(response.headers['Content-Type'])
# Build file path
file_name = f"{identifier}{ext}"
abs_path_dir = os.path.join(srv_settings.MEDIA_ROOT, "thumbs", object_type)
abs_path = os.path.join(abs_path_dir, file_name)
abs_path_tmp = file_name + '.tmp'
# Store image
try:
os.makedirs(abs_path_dir, exist_ok=True)
with open(abs_path_tmp, "wb") as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
# Resize and crop to thumbnail size
image = PIL.Image.open(abs_path_tmp)
image = PIL.ImageOps.fit(image, thumb_size)
image.save(abs_path)
image.close()
# Delete temp file
os.unlink(abs_path_tmp)
except requests.exceptions.RequestException as e:
log.error('Error while downloading stream for thumbnail %s. Error: %s', url, e)
return url
except OSError as e:
log.error('Error while writing to file %s for thumbnail %s. Error: %s', abs_path, url, e)
return url
# Return
media_url = urljoin(srv_settings.MEDIA_URL, f"thumbs/{object_type}/{file_name}")
return media_url