309 lines
8.9 KiB
Ruby
309 lines
8.9 KiB
Ruby
# typed: false
|
|
class Domain::Fa::Job::Base < Scraper::JobBase
|
|
discard_on ActiveJob::DeserializationError
|
|
|
|
def self.http_factory_method
|
|
:get_fa_http_client
|
|
end
|
|
|
|
protected
|
|
|
|
def init_from_args!(args, build_user: true, require_user_exists: false)
|
|
@force_scan = !!args[:force_scan]
|
|
@caused_by_entry = args[:caused_by_entry]
|
|
|
|
if build_user
|
|
@user =
|
|
find_or_build_user_from_args(args, caused_by_entry: @caused_by_entry)
|
|
else
|
|
@user = find_user_from_args(args)
|
|
end
|
|
|
|
logger.prefix =
|
|
"[user #{(@user&.url_name || @user&.name || args[:url_name])&.bold} / #{@user&.state&.bold}]"
|
|
|
|
return nil unless @user
|
|
if @user.new_record?
|
|
if require_user_exists
|
|
fatal_error("user must already exist")
|
|
else
|
|
@user.save!
|
|
@created_user = true
|
|
end
|
|
end
|
|
end
|
|
|
|
def find_or_build_user_from_args(args, caused_by_entry: nil)
|
|
find_user_from_args(args) ||
|
|
begin
|
|
url_name = Domain::Fa::User.name_to_url_name(args[:url_name])
|
|
user = Domain::Fa::User.new
|
|
user.url_name = url_name
|
|
user.name = url_name
|
|
user.state_detail ||= {}
|
|
user.state_detail[
|
|
"first_seen_entry"
|
|
] = caused_by_entry.id if caused_by_entry
|
|
user
|
|
end
|
|
end
|
|
|
|
def find_user_from_args(args)
|
|
args[:user] ||
|
|
begin
|
|
if args[:url_name].blank?
|
|
fatal_error("arg 'url_name' is required if arg 'user' is nil")
|
|
end
|
|
url_name = Domain::Fa::User.name_to_url_name(args[:url_name])
|
|
Domain::Fa::User.find_by(url_name: url_name)
|
|
end
|
|
end
|
|
|
|
def user_due_for_scan?(scan_type)
|
|
unless @user.scan_due?(scan_type)
|
|
if @force_scan
|
|
logger.warn(
|
|
"scanned #{@user.scanned_ago_in_words(scan_type).bold} - force scanning",
|
|
)
|
|
return true
|
|
else
|
|
logger.warn(
|
|
"scanned #{@user.scanned_ago_in_words(scan_type).bold} - skipping",
|
|
)
|
|
return false
|
|
end
|
|
end
|
|
|
|
return true
|
|
end
|
|
|
|
ListingsPageScanStats = Struct.new(:new_seen, :total_seen, :last_was_new)
|
|
|
|
def update_and_enqueue_posts_from_listings_page(
|
|
job_type,
|
|
page,
|
|
caused_by_entry,
|
|
enqueue_posts_pri:,
|
|
enqueue_page_scan: true,
|
|
enqueue_gallery_scan: true,
|
|
page_desc: nil,
|
|
fill_id_gaps: false,
|
|
continue_for: nil
|
|
)
|
|
fatal_error("not a listings page") unless page.probably_listings_page?
|
|
submissions = page.submissions_parsed
|
|
|
|
fa_ids_to_manually_enqueue = Set.new
|
|
fa_ids = Set.new(submissions.map(&:id))
|
|
|
|
create_unseen_posts = false
|
|
|
|
if fill_id_gaps && submissions.any?
|
|
create_unseen_posts = true
|
|
max_fa_id, min_fa_id = fa_ids.max, fa_ids.min
|
|
# sanity check so we don't enqueue too many post jobs
|
|
if max_fa_id - min_fa_id <= 250
|
|
(min_fa_id..max_fa_id).each do |fa_id|
|
|
fa_ids_to_manually_enqueue << fa_id unless fa_ids.include?(fa_id)
|
|
end
|
|
end
|
|
end
|
|
|
|
if continue_for && submissions.any?
|
|
max_fa_id = fa_ids.max
|
|
min_fa_id = [max_fa_id - continue_for, 0].max
|
|
fa_ids_to_manually_enqueue = Set.new(min_fa_id..max_fa_id)
|
|
fa_ids_to_manually_enqueue.subtract(fa_ids)
|
|
existing =
|
|
Domain::Fa::Post.where(
|
|
"fa_id >= ? AND fa_id <= ?",
|
|
min_fa_id,
|
|
max_fa_id,
|
|
).pluck(:fa_id)
|
|
fa_ids_to_manually_enqueue.subtract(existing)
|
|
end
|
|
|
|
page_desc = (page_desc ? "page #{page_desc.to_s.bold}" : "page")
|
|
|
|
listing_page_stats = ListingsPageScanStats.new(0, 0, false)
|
|
submissions.each do |submission|
|
|
post = Domain::Fa::Post.find_or_initialize_by(fa_id: submission.id)
|
|
listing_page_stats.last_was_new = post.new_record?
|
|
listing_page_stats.new_seen += 1 if post.new_record?
|
|
listing_page_stats.total_seen += 1
|
|
|
|
update_and_save_post_from_listings_page(
|
|
job_type,
|
|
post,
|
|
submission,
|
|
caused_by_entry,
|
|
)
|
|
if post.creator
|
|
enqueue_user_scan(
|
|
post.creator,
|
|
caused_by_entry,
|
|
enqueue_page_scan: enqueue_page_scan,
|
|
enqueue_gallery_scan: enqueue_gallery_scan,
|
|
)
|
|
end
|
|
|
|
case post.state.to_sym
|
|
when :ok
|
|
enqueue_post_scan(post, caused_by_entry, enqueue_posts_pri)
|
|
when :removed
|
|
logger.info "(todo) removed post seen in listing page, enqueue scan for fa_id #{post.fa_id}"
|
|
when :scan_error
|
|
logger.info "(todo) scan_error'd post seen in listing page for fa_id #{post.fa_id}"
|
|
when :file_error
|
|
logger.info "(todo) file_error'd post seen in listing page for fa_id #{post.fa_id}"
|
|
else
|
|
logger.info "unknown post state `#{post.state}` for fa_id #{post.fa_id}"
|
|
end
|
|
end
|
|
|
|
fa_ids_to_manually_enqueue.to_a.sort.reverse.each do |fa_id|
|
|
if create_unseen_posts
|
|
# when filling gaps, only enqueue if the post wasn't found
|
|
post = Domain::Fa::Post.find_or_initialize_by(fa_id: fa_id)
|
|
if post.new_record?
|
|
post.save!
|
|
enqueue_post_scan(post, caused_by_entry, enqueue_posts_pri)
|
|
end
|
|
else
|
|
enqueue_fa_id_scan(fa_id, caused_by_entry, enqueue_posts_pri)
|
|
end
|
|
end
|
|
|
|
logger.info "#{page_desc} has #{submissions.count.to_s.bold} posts, " +
|
|
"#{listing_page_stats.new_seen.to_s.bold} new"
|
|
|
|
listing_page_stats
|
|
end
|
|
|
|
def update_and_save_post_from_listings_page(
|
|
job_type,
|
|
post,
|
|
submission,
|
|
caused_by_entry
|
|
)
|
|
if job_type == :browse_page
|
|
post.log_entry_detail["first_browse_page_id"] ||= caused_by_entry.id
|
|
elsif job_type == :gallery_page
|
|
post.log_entry_detail["first_gallery_page_id"] ||= caused_by_entry.id
|
|
else
|
|
fatal_error("unhandled job_type: #{job_type}")
|
|
end
|
|
|
|
post.creator ||=
|
|
Domain::Fa::User.find_or_build_from_submission_parser(submission)
|
|
post.title = submission.title || fatal_error("blank title")
|
|
post.thumbnail_uri =
|
|
submission.thumb_path || fatal_error("blank thumb_path")
|
|
post.save!
|
|
end
|
|
|
|
def enqueue_user_scan(
|
|
user,
|
|
caused_by_entry,
|
|
enqueue_page_scan: true,
|
|
enqueue_gallery_scan: true,
|
|
enqueue_favs_scan: true
|
|
)
|
|
@users_enqueued_for_page_scan ||= Set.new
|
|
@users_enqueued_for_gallery_scan ||= Set.new
|
|
@users_enqueued_for_favs_scan ||= Set.new
|
|
|
|
args =
|
|
if user.persisted?
|
|
{ user: user }
|
|
else
|
|
unless user.url_name
|
|
logger.warn "user does not have a url name and is not persisted, skipping (#{user.name})"
|
|
return
|
|
end
|
|
|
|
{ url_name: user.url_name }
|
|
end.merge({ caused_by_entry: caused_by_entry })
|
|
|
|
if enqueue_page_scan && @users_enqueued_for_page_scan.add?(user.url_name)
|
|
if user.due_for_page_scan?
|
|
logger.info(
|
|
"enqueue user page job for #{user.url_name.bold}, " +
|
|
"last scanned #{time_ago_in_words(user.scanned_page_at)}",
|
|
)
|
|
defer_job(Domain::Fa::Job::UserPageJob, args)
|
|
end
|
|
end
|
|
|
|
if enqueue_gallery_scan &&
|
|
@users_enqueued_for_gallery_scan.add?(user.url_name)
|
|
if user.due_for_gallery_scan?
|
|
logger.info(
|
|
"enqueue user gallery job for #{user.url_name.bold}, " +
|
|
"last scanned #{time_ago_in_words(user.scanned_gallery_at)}",
|
|
)
|
|
defer_job(Domain::Fa::Job::UserGalleryJob, args)
|
|
end
|
|
end
|
|
|
|
if enqueue_favs_scan && @users_enqueued_for_favs_scan.add?(user.url_name)
|
|
if user.due_for_favs_scan?
|
|
logger.info(
|
|
"enqueue user favs job for #{user.url_name.bold}, " +
|
|
"last scanned #{time_ago_in_words(user.scanned_favs_at)}",
|
|
)
|
|
defer_job(Domain::Fa::Job::FavsJob, args)
|
|
end
|
|
end
|
|
end
|
|
|
|
def normalize_enqueue_pri(enqueue_pri)
|
|
case enqueue_pri
|
|
when :low
|
|
-5
|
|
when :high
|
|
-15
|
|
else
|
|
-10
|
|
end
|
|
end
|
|
|
|
def enqueue_fa_id_scan(fa_id, caused_by_entry, enqueue_pri)
|
|
enqueue_pri = normalize_enqueue_pri(enqueue_pri)
|
|
@posts_enqueued_for_scan ||= Set.new
|
|
|
|
if @posts_enqueued_for_scan.add?(fa_id)
|
|
logger.info "enqueue post scan for fa_id #{fa_id}"
|
|
defer_job(
|
|
Domain::Fa::Job::ScanPostJob,
|
|
{ fa_id: fa_id, caused_by_entry: caused_by_entry },
|
|
{ priority: enqueue_pri },
|
|
)
|
|
end
|
|
end
|
|
|
|
def enqueue_post_scan(post, caused_by_entry, enqueue_pri)
|
|
enqueue_pri = normalize_enqueue_pri(enqueue_pri)
|
|
@posts_enqueued_for_scan ||= Set.new
|
|
|
|
if @posts_enqueued_for_scan.add?(post.fa_id)
|
|
fa_id_str = (post.fa_id || "(nil)").to_s.bold
|
|
if !post.scanned?
|
|
logger.info "enqueue post scan for fa_id #{fa_id_str}"
|
|
defer_job(
|
|
Domain::Fa::Job::ScanPostJob,
|
|
{ post: post, caused_by_entry: caused_by_entry },
|
|
{ priority: enqueue_pri },
|
|
)
|
|
elsif !post.have_file?
|
|
logger.info "enqueue file scan for fa_id #{fa_id_str}"
|
|
defer_job(
|
|
Domain::Fa::Job::ScanFileJob,
|
|
{ post: post, caused_by_entry: caused_by_entry },
|
|
{ priority: enqueue_pri },
|
|
)
|
|
end
|
|
end
|
|
end
|
|
end
|