ytsm/app/YtManagerApp/management/download_manager.py

import logging
import mimetypes
import os
import re
from string import Template
from threading import Lock
from urllib.parse import urljoin

import PIL.Image
import PIL.ImageOps
import requests
import youtube_dl
from django.conf import settings as srv_settings

from YtManagerApp.models import Subscription, Video
from YtManagerApp.models import VIDEO_ORDER_MAPPING
from YtManagerApp.providers.video_provider import VideoProvider
from YtManagerApp.scheduler.job import Job
from YtManagerApp.utils import first_non_null

log = logging.getLogger('DownloadManager')


class DownloadVideoJob(Job):
    """
    Downloads a video to the disk
    """
    name = "DownloadVideoJob"
    __lock = Lock()

    def __init__(self, job_execution, video: Video, attempt: int = 1):
        super().__init__(job_execution)
        self._video = video
        self._attempt = attempt
        self._log_youtube_dl = self.log.getChild('youtube_dl')

    def get_description(self):
        ret = "Downloading video " + self._video.name
        if self._attempt > 1:
            ret += f" (attempt {self._attempt})"
        return ret

    def run(self):
        from YtManagerApp.services import Services

        # Issue: if multiple videos are downloaded at the same time, a race condition appears in the mkdirs() call that
        # youtube-dl makes, which causes it to fail with the error 'Cannot create folder - file already exists'.
        # For now, allow a single download instance.
        self.__lock.acquire()

        try:
            user = self._video.subscription.user
            provider: VideoProvider = Services.videoProviderManager().get(self._video)

            max_attempts = user.preferences['max_download_attempts']

            youtube_dl_params, output_path = self.__build_youtube_dl_params(self._video)
            with youtube_dl.YoutubeDL(youtube_dl_params) as yt:
                ret = yt.download([provider.get_video_url(self._video)])

            self.log.info('Download finished with code %d', ret)

            if ret == 0:
                self._video.downloaded_path = output_path
                self._video.save()
                self.log.info('Video %d [%s %s] downloaded successfully!', self._video.id, self._video.video_id,
                              self._video.name)

                # update size
                self._video.downloaded_size = 0
                for file in self._video.get_files():
                    self._video.downloaded_size += os.stat(file).st_size
                self._video.save()

            elif self._attempt <= max_attempts:
                self.log.warning('Re-enqueueing video (attempt %d/%d)', self._attempt, max_attempts)
                Services.videoManager().download(self._video, self._attempt + 1)

            else:
                self.log.error('Multiple attempts to download video %d [%s %s] failed!', self._video.id,
                               self._video.video_id, self._video.name)
                self._video.downloaded_path = ''
                self._video.save()

        finally:
            self.__lock.release()

    def __build_youtube_dl_params(self, video: Video):

        sub = video.subscription
        user = sub.user

        # resolve path
        download_path = user.preferences['download_path']

        template_dict = self.__build_template_dict(video)
        output_pattern = Template(user.preferences['download_file_pattern']).safe_substitute(template_dict)

        output_path = os.path.join(download_path, output_pattern)
        output_path = os.path.normpath(output_path)

        youtube_dl_params = {
            'logger': self._log_youtube_dl,
            'format': user.preferences['download_format'],
            'outtmpl': output_path,
            'writethumbnail': True,
            'writedescription': True,
            'writesubtitles': user.preferences['download_subtitles'],
            'writeautomaticsub': user.preferences['download_autogenerated_subtitles'],
            'allsubtitles': user.preferences['download_subtitles_all'],
            'merge_output_format': 'mp4',
            'postprocessors': [
                {
                    'key': 'FFmpegMetadata'
                },
            ]
        }

        sub_langs = user.preferences['download_subtitles_langs'].split(',')
        sub_langs = [i.strip() for i in sub_langs]
        if len(sub_langs) > 0:
            youtube_dl_params['subtitleslangs'] = sub_langs

        sub_format = user.preferences['download_subtitles_format']
        if len(sub_format) > 0:
            youtube_dl_params['subtitlesformat'] = sub_format

        return youtube_dl_params, output_path

    def __build_template_dict(self, video: Video):
        return {
            'channel': video.subscription.channel_name,
            'channel_id': video.subscription.channel_id,
            'playlist': video.subscription.name,
            'playlist_id': video.subscription.playlist_id,
            'playlist_index': "{:03d}".format(1 + video.playlist_index),
            'title': video.name,
            'id': video.video_id,
        }

    def __get_valid_path(self, path: str):
        """
        Normalizes string, converts to lowercase, removes non-alpha characters, removes forbidden characters.
        """
        import unicodedata
        value = unicodedata.normalize('NFKD', path).encode('ascii', 'ignore').decode('ascii')
        value = re.sub('[:"*]', '', value).strip()
        value = re.sub('[?<>|]', '#', value)
        return value


class DownloadManager(object):

    def __init__(self):
        pass

    def download_video(self, video: Video, attempt: int = 1):
        from YtManagerApp.services import Services
        Services.scheduler().add_job(DownloadVideoJob, args=[video, attempt])

    def __get_subscription_config(self, sub: Subscription):
        user = sub.user

        enabled = first_non_null(sub.auto_download, user.preferences['auto_download'])
        global_limit = user.preferences['download_global_limit']
        limit = first_non_null(sub.download_limit, user.preferences['download_subscription_limit'])
        order = first_non_null(sub.download_order, user.preferences['download_order'])
        order = VIDEO_ORDER_MAPPING[order]

        return enabled, global_limit, limit, order

    def process_subscription(self, sub: Subscription):
        from YtManagerApp.services import Services

        log.info('Processing subscription %d [%s %s]', sub.id, sub.playlist_id, sub.id)

        enabled, global_limit, limit, order = self.__get_subscription_config(sub)
        log.info('Determined settings enabled=%s global_limit=%d limit=%d order="%s"', enabled, global_limit, limit, order)

        if enabled:
            videos_to_download = Video.objects\
                .filter(subscription=sub, downloaded_path__isnull=True, watched=False)\
                .order_by(order)

            log.info('%d download candidates.', len(videos_to_download))

            if global_limit > 0:
                global_downloaded = Video.objects.filter(subscription__user=sub.user, downloaded_path__isnull=False).count()
                allowed_count = max(global_limit - global_downloaded, 0)
                videos_to_download = videos_to_download[0:allowed_count]
                log.info('Global limit is set, can only download up to %d videos.', allowed_count)

            if limit > 0:
                sub_downloaded = Video.objects.filter(subscription=sub, downloaded_path__isnull=False).count()
                allowed_count = max(limit - sub_downloaded, 0)
                videos_to_download = videos_to_download[0:allowed_count]
                log.info('Limit is set, can only download up to %d videos.', allowed_count)

            # enqueue download
            for video in videos_to_download:
                log.info('Enqueuing video %d [%s %s] index=%d', video.id, video.video_id, video.name, video.playlist_index)
                Services.videoManager().download(video)

        log.info('Finished processing subscription %d [%s %s]', sub.id, sub.playlist_id, sub.id)

    def process_all_subscriptions(self):
        for subscription in Subscription.objects.all():
            self.process_subscription(subscription)

    def fetch_thumbnail(self, url, object_type, identifier, thumb_size):

        log.info('Fetching thumbnail url=%s object_type=%s identifier=%s', url, object_type, identifier)

        # Make request to obtain mime type
        try:
            response = requests.get(url, stream=True)
        except requests.exceptions.RequestException as e:
            log.error('Failed to fetch thumbnail %s. Error: %s', url, e)
            return url

        ext = mimetypes.guess_extension(response.headers['Content-Type'])

        # Build file path
        file_name = f"{identifier}{ext}"
        abs_path_dir = os.path.join(srv_settings.MEDIA_ROOT, "thumbs", object_type)
        abs_path = os.path.join(abs_path_dir, file_name)
        abs_path_tmp = file_name + '.tmp'

        # Store image
        try:
            os.makedirs(abs_path_dir, exist_ok=True)
            with open(abs_path_tmp, "wb") as f:
                for chunk in response.iter_content(chunk_size=1024):
                    if chunk:
                        f.write(chunk)

            # Resize and crop to thumbnail size
            image = PIL.Image.open(abs_path_tmp)
            image = PIL.ImageOps.fit(image, thumb_size)
            image.save(abs_path)
            image.close()

            # Delete temp file
            os.unlink(abs_path_tmp)

        except requests.exceptions.RequestException as e:
            log.error('Error while downloading stream for thumbnail %s. Error: %s', url, e)
            return url
        except OSError as e:
            log.error('Error while writing to file %s for thumbnail %s. Error: %s', abs_path, url, e)
            return url

        # Return
        media_url = urljoin(srv_settings.MEDIA_URL, f"thumbs/{object_type}/{file_name}")
        return media_url