2018-10-21 01:20:31 +03:00
|
|
|
import errno
|
2019-08-14 17:14:16 +03:00
|
|
|
import itertools
|
2019-11-22 16:58:36 +00:00
|
|
|
import datetime
|
2018-10-27 03:33:45 +03:00
|
|
|
from threading import Lock
|
|
|
|
|
2019-11-30 13:01:44 +00:00
|
|
|
import requests
|
|
|
|
from xml.etree import ElementTree
|
2018-10-27 03:33:45 +03:00
|
|
|
from apscheduler.triggers.cron import CronTrigger
|
2019-11-22 16:58:36 +00:00
|
|
|
from django.db.models import Max, F
|
2019-08-19 21:05:13 +03:00
|
|
|
from django.conf import settings
|
2018-10-11 01:43:50 +03:00
|
|
|
|
2018-12-29 17:11:20 +02:00
|
|
|
from YtManagerApp.management.appconfig import appconfig
|
2019-08-14 17:14:16 +03:00
|
|
|
from YtManagerApp.management.downloader import fetch_thumbnail, downloader_process_subscription
|
2018-10-13 23:01:45 +03:00
|
|
|
from YtManagerApp.models import *
|
2019-08-14 17:14:16 +03:00
|
|
|
from YtManagerApp.scheduler import scheduler, Job
|
2018-10-29 18:52:09 +02:00
|
|
|
from YtManagerApp.utils import youtube
|
2019-08-14 17:14:16 +03:00
|
|
|
from external.pytaw.pytaw.utils import iterate_chunks
|
2018-10-11 01:43:50 +03:00
|
|
|
|
2018-10-29 22:45:12 +02:00
|
|
|
_ENABLE_UPDATE_STATS = True
|
2018-10-27 03:33:45 +03:00
|
|
|
|
2018-10-11 01:43:50 +03:00
|
|
|
|
2019-08-14 17:14:16 +03:00
|
|
|
class SynchronizeJob(Job):
|
|
|
|
name = "SynchronizeJob"
|
|
|
|
__lock = Lock()
|
|
|
|
running = False
|
|
|
|
__global_sync_job = None
|
2018-10-29 23:04:10 +02:00
|
|
|
|
2019-08-14 17:14:16 +03:00
|
|
|
def __init__(self, job_execution, subscription: Optional[Subscription] = None):
|
|
|
|
super().__init__(job_execution)
|
|
|
|
self.__subscription = subscription
|
|
|
|
self.__api = youtube.YoutubeAPI.build_public()
|
2019-11-30 13:01:44 +00:00
|
|
|
self.__new_videos = []
|
2018-10-21 01:20:31 +03:00
|
|
|
|
2019-08-14 17:14:16 +03:00
|
|
|
def get_description(self):
|
|
|
|
if self.__subscription is not None:
|
|
|
|
return "Running synchronization for subscription " + self.__subscription.name
|
|
|
|
return "Running synchronization..."
|
2018-10-21 01:20:31 +03:00
|
|
|
|
2019-08-14 17:14:16 +03:00
|
|
|
def get_subscription_list(self):
|
|
|
|
if self.__subscription is not None:
|
|
|
|
return [self.__subscription]
|
2019-11-22 16:58:36 +00:00
|
|
|
return Subscription.objects.all().order_by(F('last_synchronised').desc(nulls_first=True))
|
2018-10-21 01:20:31 +03:00
|
|
|
|
2019-08-14 17:14:16 +03:00
|
|
|
def run(self):
|
|
|
|
self.__lock.acquire(blocking=True)
|
|
|
|
SynchronizeJob.running = True
|
2018-10-21 01:20:31 +03:00
|
|
|
try:
|
2019-08-14 17:14:16 +03:00
|
|
|
self.log.info(self.get_description())
|
|
|
|
|
|
|
|
# Build list of work items
|
|
|
|
work_subs = self.get_subscription_list()
|
2019-11-30 13:01:44 +00:00
|
|
|
work_videos = Video.objects.filter(subscription__in=work_subs)
|
2019-08-14 17:14:16 +03:00
|
|
|
|
2019-11-30 13:01:44 +00:00
|
|
|
self.set_total_steps(len(work_subs) + len(work_videos))
|
2019-08-14 17:14:16 +03:00
|
|
|
|
2019-08-19 16:42:29 +03:00
|
|
|
# Remove the 'new' flag
|
2019-11-30 13:01:44 +00:00
|
|
|
work_videos.update(new=False)
|
2019-08-19 16:42:29 +03:00
|
|
|
|
2019-08-14 17:14:16 +03:00
|
|
|
# Process subscriptions
|
|
|
|
for sub in work_subs:
|
2019-11-30 13:01:44 +00:00
|
|
|
self.progress_advance(progress_msg="Synchronizing subscription " + sub.name)
|
2019-08-14 17:14:16 +03:00
|
|
|
self.check_new_videos(sub)
|
|
|
|
self.fetch_missing_thumbnails(sub)
|
|
|
|
|
|
|
|
# Add new videos to progress calculation
|
2019-11-30 13:01:44 +00:00
|
|
|
self.set_total_steps(len(work_subs) + len(work_videos) + len(self.__new_videos))
|
2019-08-14 17:14:16 +03:00
|
|
|
|
|
|
|
# Process videos
|
2019-11-30 13:01:44 +00:00
|
|
|
all_videos = itertools.chain(work_videos, self.__new_videos)
|
2019-08-14 17:14:16 +03:00
|
|
|
for batch in iterate_chunks(all_videos, 50):
|
|
|
|
if _ENABLE_UPDATE_STATS:
|
|
|
|
batch_ids = [video.video_id for video in batch]
|
2019-11-22 17:36:42 +00:00
|
|
|
video_stats = {v.id: v for v in self.__api.videos(batch_ids, part='id,statistics,contentDetails')}
|
|
|
|
else:
|
|
|
|
batch_ids = [video.video_id for video in filter(lambda video: video.duration == 0, batch)]
|
|
|
|
video_stats = {v.id: v for v in self.__api.videos(batch_ids, part='id,statistics,contentDetails')}
|
2019-08-14 17:14:16 +03:00
|
|
|
|
2019-08-19 16:42:29 +03:00
|
|
|
for video in batch:
|
2019-11-30 13:01:44 +00:00
|
|
|
self.progress_advance(progress_msg="Updating video " + video.name)
|
2019-08-14 17:14:16 +03:00
|
|
|
self.check_video_deleted(video)
|
|
|
|
self.fetch_missing_thumbnails(video)
|
|
|
|
|
|
|
|
if video.video_id in video_stats:
|
|
|
|
self.update_video_stats(video, video_stats[video.video_id])
|
|
|
|
|
|
|
|
# Start downloading videos
|
|
|
|
for sub in work_subs:
|
|
|
|
downloader_process_subscription(sub)
|
|
|
|
|
|
|
|
finally:
|
|
|
|
SynchronizeJob.running = False
|
|
|
|
self.__lock.release()
|
|
|
|
|
|
|
|
def check_new_videos(self, sub: Subscription):
|
2019-11-30 13:01:44 +00:00
|
|
|
if sub.last_synchronised is None:
|
|
|
|
self.check_all_videos(sub)
|
|
|
|
else:
|
|
|
|
self.check_rss_videos(sub)
|
|
|
|
sub.last_synchronised = datetime.datetime.now()
|
|
|
|
sub.save()
|
|
|
|
|
|
|
|
def check_rss_videos(self, sub: Subscription):
|
|
|
|
found_existing_video = False
|
|
|
|
|
|
|
|
rss_request = requests.get("https://www.youtube.com/feeds/videos.xml?channel_id="+sub.channel_id)
|
|
|
|
rss_request.raise_for_status()
|
|
|
|
|
|
|
|
rss = ElementTree.fromstring(rss_request.content)
|
|
|
|
for entry in rss.findall("{http://www.w3.org/2005/Atom}entry"):
|
|
|
|
video_id = entry.find("{http://www.youtube.com/xml/schemas/2015}videoId").text
|
|
|
|
results = Video.objects.filter(video_id=video_id, subscription=sub)
|
|
|
|
if results.exists():
|
|
|
|
found_existing_video = True
|
|
|
|
else:
|
|
|
|
video_title = entry.find("{http://www.w3.org/2005/Atom}title").text
|
|
|
|
|
|
|
|
self.log.info('New video for subscription %s: %s %s"', sub, video_id, video_title)
|
|
|
|
|
|
|
|
video = Video()
|
|
|
|
video.video_id = video_id
|
|
|
|
video.name = video_title
|
2019-11-30 13:12:55 +00:00
|
|
|
video.description = entry.find("{http://search.yahoo.com/mrss/}group").find("{http://search.yahoo.com/mrss/}description").text or ""
|
2019-11-30 13:01:44 +00:00
|
|
|
video.watched = False
|
|
|
|
video.new = True
|
|
|
|
video.downloaded_path = None
|
|
|
|
video.subscription = sub
|
|
|
|
video.playlist_index = 0
|
|
|
|
video.publish_date = datetime.datetime.fromisoformat(entry.find("{http://www.w3.org/2005/Atom}published").text)
|
|
|
|
video.thumbnail = entry.find("{http://search.yahoo.com/mrss/}group").find("{http://search.yahoo.com/mrss/}thumbnail").get("url")
|
|
|
|
video.rating = entry.find("{http://search.yahoo.com/mrss/}group").find("{http://search.yahoo.com/mrss/}community").find("{http://search.yahoo.com/mrss/}starRating").get("average")
|
|
|
|
video.views = entry.find("{http://search.yahoo.com/mrss/}group").find("{http://search.yahoo.com/mrss/}community").find("{http://search.yahoo.com/mrss/}statistics").get("views")
|
|
|
|
video.save()
|
|
|
|
|
|
|
|
self.__new_videos.append(video)
|
|
|
|
|
|
|
|
if not found_existing_video:
|
|
|
|
self.check_all_videos(sub)
|
|
|
|
|
|
|
|
def check_all_videos(self, sub: Subscription):
|
2019-08-14 17:14:16 +03:00
|
|
|
playlist_items = self.__api.playlist_items(sub.playlist_id)
|
2019-08-19 16:42:29 +03:00
|
|
|
if sub.rewrite_playlist_indices:
|
|
|
|
playlist_items = sorted(playlist_items, key=lambda x: x.published_at)
|
|
|
|
else:
|
|
|
|
playlist_items = sorted(playlist_items, key=lambda x: x.position)
|
2019-08-14 17:14:16 +03:00
|
|
|
|
|
|
|
for item in playlist_items:
|
|
|
|
results = Video.objects.filter(video_id=item.resource_video_id, subscription=sub)
|
|
|
|
|
2019-08-19 16:42:29 +03:00
|
|
|
if not results.exists():
|
2019-08-14 17:14:16 +03:00
|
|
|
self.log.info('New video for subscription %s: %s %s"', sub, item.resource_video_id, item.title)
|
2019-08-19 16:42:29 +03:00
|
|
|
|
|
|
|
# fix playlist index if necessary
|
|
|
|
if sub.rewrite_playlist_indices or Video.objects.filter(subscription=sub, playlist_index=item.position).exists():
|
|
|
|
highest = Video.objects.filter(subscription=sub).aggregate(Max('playlist_index'))['playlist_index__max']
|
2019-08-19 16:59:31 +03:00
|
|
|
item.position = 1 + (highest or -1)
|
2019-08-19 16:42:29 +03:00
|
|
|
|
2019-11-30 13:01:44 +00:00
|
|
|
self.__new_videos.append(Video.create(item, sub))
|
2019-08-14 17:14:16 +03:00
|
|
|
|
2019-11-30 13:01:44 +00:00
|
|
|
@staticmethod
|
|
|
|
def fetch_missing_thumbnails(obj: Union[Subscription, Video]):
|
2019-08-19 21:05:13 +03:00
|
|
|
if obj.thumbnail.startswith("http"):
|
|
|
|
if isinstance(obj, Subscription):
|
|
|
|
obj.thumbnail = fetch_thumbnail(obj.thumbnail, 'sub', obj.playlist_id, settings.THUMBNAIL_SIZE_SUBSCRIPTION)
|
|
|
|
elif isinstance(obj, Video):
|
|
|
|
obj.thumbnail = fetch_thumbnail(obj.thumbnail, 'video', obj.video_id, settings.THUMBNAIL_SIZE_VIDEO)
|
|
|
|
obj.save()
|
2019-08-14 17:14:16 +03:00
|
|
|
|
|
|
|
def check_video_deleted(self, video: Video):
|
|
|
|
if video.downloaded_path is not None:
|
|
|
|
files = []
|
|
|
|
try:
|
|
|
|
files = list(video.get_files())
|
|
|
|
except OSError as e:
|
|
|
|
if e.errno != errno.ENOENT:
|
|
|
|
self.log.error("Could not access path %s. Error: %s", video.downloaded_path, e)
|
|
|
|
self.usr_err(f"Could not access path {video.downloaded_path}: {e}", suppress_notification=True)
|
|
|
|
return
|
|
|
|
|
|
|
|
# Try to find a valid video file
|
|
|
|
found_video = False
|
2018-10-21 01:20:31 +03:00
|
|
|
for file in files:
|
2019-08-14 17:14:16 +03:00
|
|
|
mime, _ = mimetypes.guess_type(file)
|
|
|
|
if mime is not None and mime.startswith("video"):
|
|
|
|
found_video = True
|
|
|
|
|
|
|
|
# Video not found, we can safely assume that the video was deleted.
|
|
|
|
if not found_video:
|
|
|
|
self.log.info("Video %d was deleted! [%s %s]", video.id, video.video_id, video.name)
|
|
|
|
# Clean up
|
|
|
|
for file in files:
|
|
|
|
try:
|
|
|
|
os.unlink(file)
|
|
|
|
except OSError as e:
|
|
|
|
self.log.error("Could not delete redundant file %s. Error: %s", file, e)
|
|
|
|
self.usr_err(f"Could not delete redundant file {file}: {e}", suppress_notification=True)
|
|
|
|
video.downloaded_path = None
|
|
|
|
|
|
|
|
# Mark watched?
|
|
|
|
user = video.subscription.user
|
|
|
|
if user.preferences['mark_deleted_as_watched']:
|
|
|
|
video.watched = True
|
|
|
|
|
|
|
|
video.save()
|
|
|
|
|
2019-11-30 13:01:44 +00:00
|
|
|
@staticmethod
|
|
|
|
def update_video_stats(video: Video, yt_video):
|
2019-08-14 17:14:16 +03:00
|
|
|
if yt_video.n_likes is not None \
|
|
|
|
and yt_video.n_dislikes is not None \
|
|
|
|
and yt_video.n_likes + yt_video.n_dislikes > 0:
|
|
|
|
video.rating = yt_video.n_likes / (yt_video.n_likes + yt_video.n_dislikes)
|
|
|
|
|
|
|
|
video.views = yt_video.n_views
|
2019-11-22 17:36:42 +00:00
|
|
|
video.duration = yt_video.duration.total_seconds()
|
2019-08-14 17:14:16 +03:00
|
|
|
video.save()
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def schedule_global_job():
|
2018-12-31 13:53:25 +02:00
|
|
|
trigger = CronTrigger.from_crontab(appconfig.sync_schedule)
|
2018-10-21 01:20:31 +03:00
|
|
|
|
2019-08-14 17:14:16 +03:00
|
|
|
if SynchronizeJob.__global_sync_job is None:
|
|
|
|
trigger = CronTrigger.from_crontab(appconfig.sync_schedule)
|
|
|
|
SynchronizeJob.__global_sync_job = scheduler.add_job(SynchronizeJob, trigger, max_instances=1, coalesce=True)
|
2018-10-21 01:20:31 +03:00
|
|
|
|
2019-08-14 17:14:16 +03:00
|
|
|
else:
|
|
|
|
SynchronizeJob.__global_sync_job.reschedule(trigger, max_instances=1, coalesce=True)
|
2018-10-21 01:20:31 +03:00
|
|
|
|
2019-08-14 17:14:16 +03:00
|
|
|
@staticmethod
|
|
|
|
def schedule_now():
|
|
|
|
scheduler.add_job(SynchronizeJob, max_instances=1, coalesce=True)
|
2018-10-21 01:20:31 +03:00
|
|
|
|
2019-08-14 17:14:16 +03:00
|
|
|
@staticmethod
|
|
|
|
def schedule_now_for_subscription(subscription):
|
|
|
|
scheduler.add_job(SynchronizeJob, user=subscription.user, args=[subscription])
|