Began work on refactoring the YTSM application.

This commit is contained in:
2019-12-16 22:19:50 +02:00
parent 794b9bd42d
commit fd5d05232f
23 changed files with 475 additions and 347 deletions

View File

@ -1,4 +1,3 @@
from dynamic_preferences.registries import global_preferences_registry
from YtManagerApp.dynamic_preferences_registry import Initialized, YouTubeAPIKey, AllowRegistrations, SyncSchedule, SchedulerConcurrency
@ -35,7 +34,3 @@ class AppConfig(object):
value = subscription.user.preferences[pref]
return value
global_prefs = global_preferences_registry.manager()
appconfig = AppConfig(global_prefs)

View File

@ -1,4 +1,4 @@
from YtManagerApp.management.jobs.download_video import DownloadVideoJob
from YtManagerApp.scheduler.jobs.download_video_job import DownloadVideoJob
from YtManagerApp.models import Video, Subscription, VIDEO_ORDER_MAPPING
from YtManagerApp.utils import first_non_null
from django.conf import settings as srv_settings

View File

@ -1,46 +0,0 @@
import os
from YtManagerApp.models import Video
from YtManagerApp.scheduler import Job, scheduler
class DeleteVideoJob(Job):
name = "DeleteVideoJob"
def __init__(self, job_execution, video: Video):
super().__init__(job_execution)
self._video = video
def get_description(self):
return f"Deleting video {self._video}"
def run(self):
count = 0
try:
for file in self._video.get_files():
self.log.info("Deleting file %s", file)
count += 1
try:
os.unlink(file)
except OSError as e:
self.log.error("Failed to delete file %s: Error: %s", file, e)
except OSError as e:
self.log.error("Failed to delete video %d [%s %s]. Error: %s", self._video.id,
self._video.video_id, self._video.name, e)
self._video.downloaded_path = None
self._video.save()
self.log.info('Deleted video %d successfully! (%d files) [%s %s]', self._video.id, count,
self._video.video_id, self._video.name)
@staticmethod
def schedule(video: Video):
"""
Schedules a delete video job to run immediately.
:param video:
:return:
"""
scheduler.add_job(DeleteVideoJob, args=[video])

View File

@ -1,134 +0,0 @@
import os
import re
from string import Template
from threading import Lock
import youtube_dl
from YtManagerApp.models import Video
from YtManagerApp.scheduler import Job, scheduler
class DownloadVideoJob(Job):
name = "DownloadVideoJob"
__lock = Lock()
def __init__(self, job_execution, video: Video, attempt: int = 1):
super().__init__(job_execution)
self.__video = video
self.__attempt = attempt
self.__log_youtube_dl = self.log.getChild('youtube_dl')
def get_description(self):
ret = "Downloading video " + self.__video.name
if self.__attempt > 1:
ret += f" (attempt {self.__attempt})"
return ret
def run(self):
# Issue: if multiple videos are downloaded at the same time, a race condition appears in the mkdirs() call that
# youtube-dl makes, which causes it to fail with the error 'Cannot create folder - file already exists'.
# For now, allow a single download instance.
self.__lock.acquire()
try:
user = self.__video.subscription.user
max_attempts = user.preferences['max_download_attempts']
youtube_dl_params, output_path = self.__build_youtube_dl_params(self.__video)
with youtube_dl.YoutubeDL(youtube_dl_params) as yt:
ret = yt.download(["https://www.youtube.com/watch?v=" + self.__video.video_id])
self.log.info('Download finished with code %d', ret)
if ret == 0:
self.__video.downloaded_path = output_path
self.__video.save()
self.log.info('Video %d [%s %s] downloaded successfully!', self.__video.id, self.__video.video_id, self.__video.name)
elif self.__attempt <= max_attempts:
self.log.warning('Re-enqueueing video (attempt %d/%d)', self.__attempt, max_attempts)
DownloadVideoJob.schedule(self.__video, self.__attempt + 1)
else:
self.log.error('Multiple attempts to download video %d [%s %s] failed!', self.__video.id, self.__video.video_id,
self.__video.name)
self.__video.downloaded_path = ''
self.__video.save()
finally:
self.__lock.release()
def __build_youtube_dl_params(self, video: Video):
sub = video.subscription
user = sub.user
# resolve path
download_path = user.preferences['download_path']
template_dict = self.__build_template_dict(video)
output_pattern = Template(user.preferences['download_file_pattern']).safe_substitute(template_dict)
output_path = os.path.join(download_path, output_pattern)
output_path = os.path.normpath(output_path)
youtube_dl_params = {
'logger': self.__log_youtube_dl,
'format': user.preferences['download_format'],
'outtmpl': output_path,
'writethumbnail': True,
'writedescription': True,
'writesubtitles': user.preferences['download_subtitles'],
'writeautomaticsub': user.preferences['download_autogenerated_subtitles'],
'allsubtitles': user.preferences['download_subtitles_all'],
'merge_output_format': 'mp4',
'postprocessors': [
{
'key': 'FFmpegMetadata'
},
]
}
sub_langs = user.preferences['download_subtitles_langs'].split(',')
sub_langs = [i.strip() for i in sub_langs]
if len(sub_langs) > 0:
youtube_dl_params['subtitleslangs'] = sub_langs
sub_format = user.preferences['download_subtitles_format']
if len(sub_format) > 0:
youtube_dl_params['subtitlesformat'] = sub_format
return youtube_dl_params, output_path
def __build_template_dict(self, video: Video):
return {
'channel': video.subscription.channel_name,
'channel_id': video.subscription.channel_id,
'playlist': video.subscription.name,
'playlist_id': video.subscription.playlist_id,
'playlist_index': "{:03d}".format(1 + video.playlist_index),
'title': video.name,
'id': video.video_id,
}
def __get_valid_path(self, path):
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens.
"""
import unicodedata
value = unicodedata.normalize('NFKD', path).encode('ascii', 'ignore').decode('ascii')
value = re.sub('[:"*]', '', value).strip()
value = re.sub('[?<>|]', '#', value)
return value
@staticmethod
def schedule(video: Video, attempt: int = 1):
"""
Schedules to download video immediately
:param video:
:param attempt:
:return:
"""
scheduler.add_job(DownloadVideoJob, args=[video, attempt])

View File

@ -1,185 +0,0 @@
import errno
import itertools
from threading import Lock
from apscheduler.triggers.cron import CronTrigger
from django.db.models import Max
from django.conf import settings
from YtManagerApp.management.appconfig import appconfig
from YtManagerApp.management.downloader import fetch_thumbnail, downloader_process_subscription
from YtManagerApp.models import *
from YtManagerApp.scheduler import scheduler, Job
from YtManagerApp.utils import youtube
from external.pytaw.pytaw.utils import iterate_chunks
_ENABLE_UPDATE_STATS = True
class SynchronizeJob(Job):
name = "SynchronizeJob"
__lock = Lock()
running = False
__global_sync_job = None
def __init__(self, job_execution, subscription: Optional[Subscription] = None):
super().__init__(job_execution)
self.__subscription = subscription
self.__api = youtube.YoutubeAPI.build_public()
self.__new_vids = []
def get_description(self):
if self.__subscription is not None:
return "Running synchronization for subscription " + self.__subscription.name
return "Running synchronization..."
def get_subscription_list(self):
if self.__subscription is not None:
return [self.__subscription]
return Subscription.objects.all()
def get_videos_list(self, subs):
return Video.objects.filter(subscription__in=subs)
def run(self):
self.__lock.acquire(blocking=True)
SynchronizeJob.running = True
try:
self.log.info(self.get_description())
# Build list of work items
work_subs = self.get_subscription_list()
work_vids = self.get_videos_list(work_subs)
self.set_total_steps(len(work_subs) + len(work_vids))
# Remove the 'new' flag
work_vids.update(new=False)
# Process subscriptions
for sub in work_subs:
self.progress_advance(1, "Synchronizing subscription " + sub.name)
self.check_new_videos(sub)
self.fetch_missing_thumbnails(sub)
# Add new videos to progress calculation
self.set_total_steps(len(work_subs) + len(work_vids) + len(self.__new_vids))
# Process videos
all_videos = itertools.chain(work_vids, self.__new_vids)
for batch in iterate_chunks(all_videos, 50):
video_stats = {}
if _ENABLE_UPDATE_STATS:
batch_ids = [video.video_id for video in batch]
video_stats = {v.id: v for v in self.__api.videos(batch_ids, part='id,statistics')}
for video in batch:
self.progress_advance(1, "Updating video " + video.name)
self.check_video_deleted(video)
self.fetch_missing_thumbnails(video)
if video.video_id in video_stats:
self.update_video_stats(video, video_stats[video.video_id])
# Start downloading videos
for sub in work_subs:
downloader_process_subscription(sub)
finally:
SynchronizeJob.running = False
self.__lock.release()
def check_new_videos(self, sub: Subscription):
playlist_items = self.__api.playlist_items(sub.playlist_id)
if sub.rewrite_playlist_indices:
playlist_items = sorted(playlist_items, key=lambda x: x.published_at)
else:
playlist_items = sorted(playlist_items, key=lambda x: x.position)
for item in playlist_items:
results = Video.objects.filter(video_id=item.resource_video_id, subscription=sub)
if not results.exists():
self.log.info('New video for subscription %s: %s %s"', sub, item.resource_video_id, item.title)
# fix playlist index if necessary
if sub.rewrite_playlist_indices or Video.objects.filter(subscription=sub, playlist_index=item.position).exists():
highest = Video.objects.filter(subscription=sub).aggregate(Max('playlist_index'))['playlist_index__max']
item.position = 1 + (highest or -1)
self.__new_vids.append(Video.create(item, sub))
def fetch_missing_thumbnails(self, obj: Union[Subscription, Video]):
if obj.thumbnail.startswith("http"):
if isinstance(obj, Subscription):
obj.thumbnail = fetch_thumbnail(obj.thumbnail, 'sub', obj.playlist_id, settings.THUMBNAIL_SIZE_SUBSCRIPTION)
elif isinstance(obj, Video):
obj.thumbnail = fetch_thumbnail(obj.thumbnail, 'video', obj.video_id, settings.THUMBNAIL_SIZE_VIDEO)
obj.save()
def check_video_deleted(self, video: Video):
if video.downloaded_path is not None:
files = []
try:
files = list(video.get_files())
except OSError as e:
if e.errno != errno.ENOENT:
self.log.error("Could not access path %s. Error: %s", video.downloaded_path, e)
self.usr_err(f"Could not access path {video.downloaded_path}: {e}", suppress_notification=True)
return
# Try to find a valid video file
found_video = False
for file in files:
mime, _ = mimetypes.guess_type(file)
if mime is not None and mime.startswith("video"):
found_video = True
# Video not found, we can safely assume that the video was deleted.
if not found_video:
self.log.info("Video %d was deleted! [%s %s]", video.id, video.video_id, video.name)
# Clean up
for file in files:
try:
os.unlink(file)
except OSError as e:
self.log.error("Could not delete redundant file %s. Error: %s", file, e)
self.usr_err(f"Could not delete redundant file {file}: {e}", suppress_notification=True)
video.downloaded_path = None
# Mark watched?
user = video.subscription.user
if user.preferences['mark_deleted_as_watched']:
video.watched = True
video.save()
def update_video_stats(self, video: Video, yt_video):
if yt_video.n_likes is not None \
and yt_video.n_dislikes is not None \
and yt_video.n_likes + yt_video.n_dislikes > 0:
video.rating = yt_video.n_likes / (yt_video.n_likes + yt_video.n_dislikes)
video.views = yt_video.n_views
video.save()
@staticmethod
def schedule_global_job():
trigger = CronTrigger.from_crontab(appconfig.sync_schedule)
if SynchronizeJob.__global_sync_job is None:
trigger = CronTrigger.from_crontab(appconfig.sync_schedule)
SynchronizeJob.__global_sync_job = scheduler.add_job(SynchronizeJob, trigger, max_instances=1, coalesce=True)
else:
SynchronizeJob.__global_sync_job.reschedule(trigger, max_instances=1, coalesce=True)
@staticmethod
def schedule_now():
scheduler.add_job(SynchronizeJob, max_instances=1, coalesce=True)
@staticmethod
def schedule_now_for_subscription(subscription):
scheduler.add_job(SynchronizeJob, user=subscription.user, args=[subscription])

View File

@ -0,0 +1,111 @@
import logging
import os
import subprocess
import sys
import requests
from django.conf import settings as dj_settings
LATEST_URL = "https://yt-dl.org/downloads/latest/youtube-dl"
GITHUB_API_LATEST_RELEASE = "https://api.github.com/repos/ytdl-org/youtube-dl/releases/latest"
log = logging.getLogger("YoutubeDlManager")
class YoutubeDlException(Exception):
pass
class YoutubeDlNotInstalledException(YoutubeDlException):
pass
class YoutubeDlRuntimeException(YoutubeDlException):
pass
class YoutubeDlManager(object):
def __init__(self):
self.verbose = False
self.progress = False
def _get_path(self):
return os.path.join(dj_settings.DATA_DIR, 'youtube-dl')
def _check_installed(self, path):
return os.path.isfile(path) and os.access(path, os.X_OK)
def _get_run_args(self):
run_args = []
if self.verbose:
run_args.append('-v')
if self.progress:
run_args.append('--newline')
else:
run_args.append('--no-progress')
return run_args
def run(self, *args):
path = self._get_path()
if not self._check_installed(path):
log.error("Cannot run youtube-dl, it is not installed!")
raise YoutubeDlNotInstalledException
run_args = self._get_run_args()
ret = subprocess.run([sys.executable, path, *run_args, *args], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout = ret.stdout.decode('utf-8')
if len(stdout) > 0:
log.info("YoutubeDL: " + stdout)
stderr = ret.stderr.decode('utf-8')
if len(stderr) > 0:
log.error("YoutubeDL: " + stderr)
if ret.returncode != 0:
raise YoutubeDlRuntimeException()
return stdout
def get_installed_version(self):
return self.run('--version')
def get_latest_version(self):
resp = requests.get(GITHUB_API_LATEST_RELEASE, allow_redirects=True)
resp.raise_for_status()
info = resp.json()
return info['tag_name']
def install(self):
# Check if we are running the latest version
latest = self.get_latest_version()
try:
current = self.get_installed_version()
except YoutubeDlNotInstalledException:
current = None
if latest == current:
log.info(f"Running latest youtube-dl version ({current})!")
return
# Download latest
resp = requests.get(LATEST_URL, allow_redirects=True, stream=True)
resp.raise_for_status()
path = self._get_path()
with open(path + ".tmp", "wb") as f:
for chunk in resp.iter_content(10 * 1024):
f.write(chunk)
# Replace
os.unlink(path)
os.rename(path + ".tmp", path)
os.chmod(path, 555)
# Test run
newver = self.get_installed_version()
if current is None:
log.info(f"Installed youtube-dl version {newver}.")
else:
log.info(f"Upgraded youtube-dl from version {current} to {newver}.")