Added docker support

2024-02-24 05:43:31 +00:00 · 2018-10-30 14:15:49 +08:00
parent 97e7e792f8
commit 84b0c2e861
108 changed files with 240 additions and 16 deletions
--- a/app/YtManagerApp/management/init.py
+++ b/app/YtManagerApp/management/init.py
--- a/app/YtManagerApp/management/downloader.py
+++ b/app/YtManagerApp/management/downloader.py
@@ -0,0 +1,103 @@
+from YtManagerApp.appconfig import settings
+from YtManagerApp.management.jobs.download_video import schedule_download_video
+from YtManagerApp.models import Video, Subscription, VIDEO_ORDER_MAPPING
+from django.conf import settings as srv_settings
+import logging
+import requests
+import mimetypes
+import os
+from urllib.parse import urljoin
+
+log = logging.getLogger('downloader')
+
+
+def __get_subscription_config(sub: Subscription):
+    enabled = settings.getboolean_sub(sub, 'user', 'AutoDownload')
+
+    global_limit = -1
+    if len(settings.get_sub(sub, 'user', 'DownloadGlobalLimit')) > 0:
+        global_limit = settings.getint_sub(sub, 'user', 'DownloadGlobalLimit')
+
+    limit = -1
+    if len(settings.get_sub(sub, 'user', 'DownloadSubscriptionLimit')) > 0:
+        limit = settings.getint_sub(sub, 'user', 'DownloadSubscriptionLimit')
+
+    order = settings.get_sub(sub, 'user', 'DownloadOrder')
+    order = VIDEO_ORDER_MAPPING[order]
+
+    return enabled, global_limit, limit, order
+
+
+def downloader_process_subscription(sub: Subscription):
+    log.info('Processing subscription %d [%s %s]', sub.id, sub.playlist_id, sub.id)
+
+    enabled, global_limit, limit, order = __get_subscription_config(sub)
+    log.info('Determined settings enabled=%s global_limit=%d limit=%d order="%s"', enabled, global_limit, limit, order)
+
+    if enabled:
+        videos_to_download = Video.objects\
+            .filter(subscription=sub, downloaded_path__isnull=True, watched=False)\
+            .order_by(order)
+
+        log.info('%d download candidates.', len(videos_to_download))
+
+        if global_limit > 0:
+            global_downloaded = Video.objects.filter(subscription__user=sub.user, downloaded_path__isnull=False).count()
+            allowed_count = max(global_limit - global_downloaded, 0)
+            videos_to_download = videos_to_download[0:allowed_count]
+            log.info('Global limit is set, can only download up to %d videos.', allowed_count)
+
+        if limit > 0:
+            sub_downloaded = Video.objects.filter(subscription=sub, downloaded_path__isnull=False).count()
+            allowed_count = max(limit - sub_downloaded, 0)
+            videos_to_download = videos_to_download[0:allowed_count]
+            log.info('Limit is set, can only download up to %d videos.', allowed_count)
+
+        # enqueue download
+        for video in videos_to_download:
+            log.info('Enqueuing video %d [%s %s] index=%d', video.id, video.video_id, video.name, video.playlist_index)
+            schedule_download_video(video)
+
+    log.info('Finished processing subscription %d [%s %s]', sub.id, sub.playlist_id, sub.id)
+
+
+def downloader_process_all():
+    for subscription in Subscription.objects.all():
+        downloader_process_subscription(subscription)
+
+
+def fetch_thumbnail(url, object_type, identifier, quality):
+
+    log.info('Fetching thumbnail url=%s object_type=%s identifier=%s quality=%s', url, object_type, identifier, quality)
+
+    # Make request to obtain mime type
+    try:
+        response = requests.get(url, stream=True)
+    except requests.exceptions.RequestException as e:
+        log.error('Failed to fetch thumbnail %s. Error: %s', url, e)
+        return url
+
+    ext = mimetypes.guess_extension(response.headers['Content-Type'])
+
+    # Build file path
+    file_name = f"{identifier}-{quality}{ext}"
+    abs_path_dir = os.path.join(srv_settings.MEDIA_ROOT, "thumbs", object_type)
+    abs_path = os.path.join(abs_path_dir, file_name)
+
+    # Store image
+    try:
+        os.makedirs(abs_path_dir, exist_ok=True)
+        with open(abs_path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=1024):
+                if chunk:
+                    f.write(chunk)
+    except requests.exceptions.RequestException as e:
+        log.error('Error while downloading stream for thumbnail %s. Error: %s', url, e)
+        return url
+    except OSError as e:
+        log.error('Error while writing to file %s for thumbnail %s. Error: %s', abs_path, url, e)
+        return url
+
+    # Return
+    media_url = urljoin(srv_settings.MEDIA_URL, f"thumbs/{object_type}/{file_name}")
+    return media_url
--- a/app/YtManagerApp/management/jobs/init.py
+++ b/app/YtManagerApp/management/jobs/init.py
--- a/app/YtManagerApp/management/jobs/delete_video.py
+++ b/app/YtManagerApp/management/jobs/delete_video.py
@@ -0,0 +1,39 @@
+import logging
+import os
+
+from YtManagerApp import scheduler
+from YtManagerApp.models import Video
+
+log = logging.getLogger('video_downloader')
+
+
+def delete_video(video: Video):
+    log.info('Deleting video %d [%s %s]', video.id, video.video_id, video.name)
+    count = 0
+
+    try:
+        for file in video.get_files():
+            log.info("Deleting file %s", file)
+            count += 1
+            try:
+                os.unlink(file)
+            except OSError as e:
+                log.error("Failed to delete file %s: Error: %s", file, e)
+
+    except OSError as e:
+        log.error("Failed to delete video %d [%s %s]. Error: %s", video.id, video.video_id, video.name, e)
+
+    video.downloaded_path = None
+    video.save()
+
+    log.info('Deleted video %d successfully! (%d files) [%s %s]', video.id, count, video.video_id, video.name)
+
+
+def schedule_delete_video(video: Video):
+    """
+    Schedules a download video job to run immediately.
+    :param video:
+    :return:
+    """
+    job = scheduler.scheduler.add_job(delete_video, args=[video])
+    log.info('Scheduled delete video job video=(%s), job=%s', video, job.id)
--- a/app/YtManagerApp/management/jobs/download_video.py
+++ b/app/YtManagerApp/management/jobs/download_video.py
@@ -0,0 +1,110 @@
+from YtManagerApp.models import Video
+from YtManagerApp import scheduler
+from YtManagerApp.appconfig import settings
+import os
+import youtube_dl
+import logging
+import re
+
+log = logging.getLogger('video_downloader')
+log_youtube_dl = log.getChild('youtube_dl')
+
+
+def __get_valid_path(path):
+    """
+    Normalizes string, converts to lowercase, removes non-alpha characters,
+    and converts spaces to hyphens.
+    """
+    import unicodedata
+    value = unicodedata.normalize('NFKD', path).encode('ascii', 'ignore').decode('ascii')
+    value = re.sub('[:"*]', '', value).strip()
+    value = re.sub('[?<>|]', '#', value)
+    return value
+
+
+def __build_youtube_dl_params(video: Video):
+    # resolve path
+    pattern_dict = {
+        'channel': video.subscription.channel_name,
+        'channel_id': video.subscription.channel_id,
+        'playlist': video.subscription.name,
+        'playlist_id': video.subscription.playlist_id,
+        'playlist_index': "{:03d}".format(1 + video.playlist_index),
+        'title': video.name,
+        'id': video.video_id,
+    }
+
+    download_path = settings.get_sub(video.subscription, 'user', 'DownloadPath')
+    output_pattern = __get_valid_path(settings.get_sub(
+        video.subscription, 'user', 'DownloadFilePattern', vars=pattern_dict))
+
+    output_path = os.path.join(download_path, output_pattern)
+    output_path = os.path.normpath(output_path)
+
+    youtube_dl_params = {
+        'logger': log_youtube_dl,
+        'format': settings.get_sub(video.subscription, 'user', 'DownloadFormat'),
+        'outtmpl': output_path,
+        'writethumbnail': True,
+        'writedescription': True,
+        'writesubtitles': settings.getboolean_sub(video.subscription, 'user', 'DownloadSubtitles'),
+        'writeautomaticsub': settings.getboolean_sub(video.subscription, 'user', 'DownloadAutogeneratedSubtitles'),
+        'allsubtitles': settings.getboolean_sub(video.subscription, 'user', 'DownloadSubtitlesAll'),
+        'postprocessors': [
+            {
+                'key': 'FFmpegMetadata'
+            },
+        ]
+    }
+
+    sub_langs = settings.get_sub(video.subscription, 'user', 'DownloadSubtitlesLangs').split(',')
+    sub_langs = [i.strip() for i in sub_langs]
+    if len(sub_langs) > 0:
+        youtube_dl_params['subtitleslangs'] = sub_langs
+
+    sub_format = settings.get_sub(video.subscription, 'user', 'DownloadSubtitlesFormat')
+    if len(sub_format) > 0:
+        youtube_dl_params['subtitlesformat'] = sub_format
+
+    return youtube_dl_params, output_path
+
+
+def download_video(video: Video, attempt: int = 1):
+
+    log.info('Downloading video %d [%s %s]', video.id, video.video_id, video.name)
+
+    max_attempts = settings.getint_sub(video.subscription, 'user', 'DownloadMaxAttempts', fallback=3)
+
+    youtube_dl_params, output_path = __build_youtube_dl_params(video)
+    with youtube_dl.YoutubeDL(youtube_dl_params) as yt:
+        ret = yt.download(["https://www.youtube.com/watch?v=" + video.video_id])
+
+    log.info('Download finished with code %d', ret)
+
+    if ret == 0:
+        video.downloaded_path = output_path
+        video.save()
+        log.info('Video %d [%s %s] downloaded successfully!', video.id, video.video_id, video.name)
+
+    elif attempt <= max_attempts:
+        log.warning('Re-enqueueing video (attempt %d/%d)', attempt, max_attempts)
+        __schedule_download_video(video, attempt + 1)
+
+    else:
+        log.error('Multiple attempts to download video %d [%s %s] failed!', video.id, video.video_id, video.name)
+        video.downloaded_path = ''
+        video.save()
+
+
+def __schedule_download_video(video: Video, attempt=1):
+    job = scheduler.scheduler.add_job(download_video, args=[video, attempt])
+    log.info('Scheduled download video job video=(%s), attempt=%d, job=%s', video, attempt, job.id)
+
+
+def schedule_download_video(video: Video):
+    """
+    Schedules a download video job to run immediately.
+    :param video:
+    :return:
+    """
+    __schedule_download_video(video)
--- a/app/YtManagerApp/management/jobs/synchronize.py
+++ b/app/YtManagerApp/management/jobs/synchronize.py
@@ -0,0 +1,162 @@
+import errno
+import mimetypes
+from threading import Lock
+
+from apscheduler.triggers.cron import CronTrigger
+
+from YtManagerApp import scheduler
+from YtManagerApp.appconfig import settings
+from YtManagerApp.management.downloader import fetch_thumbnail, downloader_process_all, downloader_process_subscription
+from YtManagerApp.models import *
+from YtManagerApp.utils import youtube
+
+log = logging.getLogger('sync')
+__lock = Lock()
+
+_ENABLE_UPDATE_STATS = True
+
+
+def __check_new_videos_sub(subscription: Subscription, yt_api: youtube.YoutubeAPI):
+    # Get list of videos
+    for item in yt_api.playlist_items(subscription.playlist_id):
+        results = Video.objects.filter(video_id=item.resource_video_id, subscription=subscription)
+        if len(results) == 0:
+            log.info('New video for subscription %s: %s %s"', subscription, item.resource_video_id, item.title)
+            Video.create(item, subscription)
+
+    if _ENABLE_UPDATE_STATS:
+        all_vids = Video.objects.filter(subscription=subscription)
+        all_vids_ids = [video.video_id for video in all_vids]
+        all_vids_dict = {v.video_id: v for v in all_vids}
+
+        for yt_video in yt_api.videos(all_vids_ids, part='id,statistics'):
+            video = all_vids_dict.get(yt_video.id)
+
+            if yt_video.n_likes is not None \
+                    and yt_video.n_dislikes is not None \
+                    and yt_video.n_likes + yt_video.n_dislikes > 0:
+                video.rating = yt_video.n_likes / (yt_video.n_likes + yt_video.n_dislikes)
+
+            video.views = yt_video.n_views
+            video.save()
+
+
+def __detect_deleted(subscription: Subscription):
+
+    for video in Video.objects.filter(subscription=subscription, downloaded_path__isnull=False):
+        found_video = False
+        files = []
+        try:
+            files = list(video.get_files())
+        except OSError as e:
+            if e.errno != errno.ENOENT:
+                log.error("Could not access path %s. Error: %s", video.downloaded_path, e)
+                return
+
+        # Try to find a valid video file
+        for file in files:
+            mime, _ = mimetypes.guess_type(file)
+            if mime is not None and mime.startswith("video"):
+                found_video = True
+
+        # Video not found, we can safely assume that the video was deleted.
+        if not found_video:
+            log.info("Video %d was deleted! [%s %s]", video.id, video.video_id, video.name)
+            # Clean up
+            for file in files:
+                try:
+                    os.unlink(file)
+                except OSError as e:
+                    log.error("Could not delete redundant file %s. Error: %s", file, e)
+            video.downloaded_path = None
+
+            # Mark watched?
+            if settings.getboolean_sub(subscription, 'user', 'MarkDeletedAsWatched'):
+                video.watched = True
+
+            video.save()
+
+
+def __fetch_thumbnails_obj(iterable, obj_type, id_attr):
+    for obj in iterable:
+        if obj.icon_default.startswith("http"):
+            obj.icon_default = fetch_thumbnail(obj.icon_default, obj_type, getattr(obj, id_attr), 'default')
+        if obj.icon_best.startswith("http"):
+            obj.icon_best = fetch_thumbnail(obj.icon_best, obj_type, getattr(obj, id_attr), 'best')
+        obj.save()
+
+
+def __fetch_thumbnails():
+    log.info("Fetching subscription thumbnails... ")
+    __fetch_thumbnails_obj(Subscription.objects.filter(icon_default__istartswith='http'), 'sub', 'playlist_id')
+    __fetch_thumbnails_obj(Subscription.objects.filter(icon_best__istartswith='http'), 'sub', 'playlist_id')
+
+    log.info("Fetching video thumbnails... ")
+    __fetch_thumbnails_obj(Video.objects.filter(icon_default__istartswith='http'), 'video', 'video_id')
+    __fetch_thumbnails_obj(Video.objects.filter(icon_best__istartswith='http'), 'video', 'video_id')
+
+
+def synchronize():
+    if not __lock.acquire(blocking=False):
+        # Synchronize already running in another thread
+        log.info("Synchronize already running in another thread")
+        return
+
+    try:
+        log.info("Running scheduled synchronization... ")
+
+        # Sync subscribed playlists/channels
+        log.info("Sync - checking videos")
+        yt_api = youtube.YoutubeAPI.build_public()
+        for subscription in Subscription.objects.all():
+            __check_new_videos_sub(subscription, yt_api)
+            __detect_deleted(subscription)
+
+        log.info("Sync - checking for videos to download")
+        downloader_process_all()
+
+        log.info("Sync - fetching missing thumbnails")
+        __fetch_thumbnails()
+
+        log.info("Synchronization finished.")
+
+    finally:
+        __lock.release()
+
+
+def synchronize_subscription(subscription: Subscription):
+    __lock.acquire()
+    try:
+        log.info("Running synchronization for single subscription %d [%s]", subscription.id, subscription.name)
+        yt_api = youtube.YoutubeAPI.build_public()
+
+        log.info("Sync - checking videos")
+        __check_new_videos_sub(subscription, yt_api)
+        __detect_deleted(subscription)
+
+        log.info("Sync - checking for videos to download")
+        downloader_process_subscription(subscription)
+
+        log.info("Sync - fetching missing thumbnails")
+        __fetch_thumbnails()
+
+        log.info("Synchronization finished for subscription %d [%s].", subscription.id, subscription.name)
+
+    finally:
+        __lock.release()
+
+
+def schedule_synchronize_global():
+    trigger = CronTrigger.from_crontab(settings.get('global', 'SynchronizationSchedule'))
+    job = scheduler.scheduler.add_job(synchronize, trigger, max_instances=1, coalesce=True)
+    log.info('Scheduled synchronize job job=%s', job.id)
+
+
+def schedule_synchronize_now():
+    job = scheduler.scheduler.add_job(synchronize, max_instances=1, coalesce=True)
+    log.info('Scheduled synchronize now job job=%s', job.id)
+
+
+def schedule_synchronize_now_subscription(subscription: Subscription):
+    job = scheduler.scheduler.add_job(synchronize_subscription, args=[subscription])
+    log.info('Scheduled synchronize subscription job subscription=(%s), job=%s', subscription, job.id)
--- a/app/YtManagerApp/management/subscriptions.py
+++ b/app/YtManagerApp/management/subscriptions.py
--- a/app/YtManagerApp/management/videos.py
+++ b/app/YtManagerApp/management/videos.py
@@ -0,0 +1,57 @@
+import re
+from typing import Optional
+
+from django.contrib.auth.models import User
+from django.db.models import Q
+
+from YtManagerApp.models import Subscription, Video, SubscriptionFolder
+
+
+def get_videos(user: User,
+               sort_order: Optional[str],
+               query: Optional[str] = None,
+               subscription_id: Optional[int] = None,
+               folder_id: Optional[int] = None,
+               only_watched: Optional[bool] = None,
+               only_downloaded: Optional[bool] = None,
+               ):
+
+    filter_args = []
+    filter_kwargs = {
+        'subscription__user': user
+    }
+
+    # Process query string - basically, we break it down into words,
+    # and then search for the given text in the name, description, uploader name and subscription name
+    if query is not None:
+        for match in re.finditer(r'\w+', query):
+            word = match[0]
+            filter_args.append(Q(name__icontains=word)
+                               | Q(description__icontains=word)
+                               | Q(uploader_name__icontains=word)
+                               | Q(subscription__name__icontains=word))
+
+    # Subscription id
+    if subscription_id is not None:
+        filter_kwargs['subscription_id'] = subscription_id
+
+    # Folder id
+    if folder_id is not None:
+        # Visit function - returns only the subscription IDs
+        def visit(node):
+            if isinstance(node, Subscription):
+                return node.id
+            return None
+        filter_kwargs['subscription_id__in'] = SubscriptionFolder.traverse(folder_id, user, visit)
+
+    # Only watched
+    if only_watched is not None:
+        filter_kwargs['watched'] = only_watched
+
+    # Only downloaded
+    # - not downloaded (False) -> is null (True)
+    # - downloaded (True) -> is not null (False)
+    if only_downloaded is not None:
+        filter_kwargs['downloaded_path__isnull'] = not only_downloaded
+
+    return Video.objects.filter(*filter_args, **filter_kwargs).order_by(sort_order)