168 lines
5.2 KiB
Ruby
168 lines
5.2 KiB
Ruby
class Domain::Fa::Job::FaJobBase < Scraper::JobBase
|
|
discard_on ActiveJob::DeserializationError
|
|
|
|
def self.build_http_client
|
|
@@fa_base_http_client ||= begin
|
|
Scraper::FaHttpClient.new
|
|
end
|
|
end
|
|
|
|
protected
|
|
|
|
def http_client
|
|
@http_client ||= self.class.build_http_client
|
|
end
|
|
|
|
def find_or_intitialize_user_from_args(args, caused_by_entry: nil)
|
|
args[:user] || begin
|
|
url_name = args[:url_name]&.downcase
|
|
fatal_error("arg 'url_name' is required if arg 'user' is nil") if url_name.blank?
|
|
Domain::Fa::User.find_or_create_by(url_name: url_name) do |user|
|
|
user.state_detail ||= {}
|
|
user.state_detail["first_seen_entry"] = caused_by_entry.id if caused_by_entry
|
|
end
|
|
end
|
|
end
|
|
|
|
ListingsPageScanStats = Struct.new(
|
|
:new_seen,
|
|
:total_seen,
|
|
:last_was_new,
|
|
)
|
|
|
|
def update_and_enqueue_posts_from_listings_page(
|
|
job_type,
|
|
page,
|
|
caused_by_entry,
|
|
enqueue_posts_pri:,
|
|
enqueue_page_scan: true,
|
|
enqueue_gallery_scan: true,
|
|
page_desc: nil
|
|
)
|
|
fatal_error("not a listings page") unless page.probably_listings_page?
|
|
submissions = page.submissions_parsed
|
|
|
|
page_desc = if page_desc
|
|
"page #{page_desc.to_s.bold}"
|
|
else
|
|
"page"
|
|
end
|
|
|
|
listing_page_stats = ListingsPageScanStats.new(0, 0, false)
|
|
submissions.each do |submission|
|
|
post = Domain::Fa::Post.find_or_initialize_by(fa_id: submission.id)
|
|
listing_page_stats.last_was_new = post.new_record?
|
|
listing_page_stats.new_seen += 1 if post.new_record?
|
|
listing_page_stats.total_seen += 1
|
|
|
|
update_post_from_listings_page(job_type, post, submission, caused_by_entry)
|
|
enqueue_user_scan(
|
|
post.creator,
|
|
caused_by_entry,
|
|
enqueue_page_scan: enqueue_page_scan,
|
|
enqueue_gallery_scan: enqueue_gallery_scan,
|
|
) if post.creator
|
|
|
|
case post.state.to_sym
|
|
when :ok
|
|
enqueue_post_scan(post, caused_by_entry, enqueue_posts_pri)
|
|
when :removed
|
|
logger.info "(todo) removed post seen in listing page, enqueue scan for fa_id #{post.fa_id}"
|
|
when :scan_error
|
|
logger.info "(todo) scan_error'd post seen in listing page for fa_id #{post.fa_id}"
|
|
when :file_error
|
|
logger.info "(todo) file_error'd post seen in listing page for fa_id #{post.fa_id}"
|
|
else
|
|
logger.info "unknown post state `#{post.state}` for fa_id #{post.fa_id}"
|
|
end
|
|
end
|
|
|
|
logger.info "#{page_desc} has #{submissions.count.to_s.bold} posts, " +
|
|
"#{listing_page_stats.new_seen.to_s.bold} new"
|
|
|
|
listing_page_stats
|
|
end
|
|
|
|
def update_post_from_listings_page(job_type, post, submission, caused_by_entry)
|
|
if job_type == :browse_page
|
|
post.log_entry_detail["first_browse_page_id"] ||= caused_by_entry.id
|
|
elsif job_type == :gallery_page
|
|
post.log_entry_detail["first_gallery_page_id"] ||= caused_by_entry.id
|
|
else
|
|
fatal_error("unhandled job_type: #{job_type}")
|
|
end
|
|
|
|
post.creator ||= Domain::Fa::User.find_or_build_from_submission_parser(submission)
|
|
post.title = submission.title || fatal_error("blank title")
|
|
post.thumbnail_uri = submission.thumb_path || fatal_error("blank thumb_path")
|
|
post.save!
|
|
end
|
|
|
|
def enqueue_user_scan(
|
|
user,
|
|
caused_by_entry,
|
|
enqueue_page_scan: true,
|
|
enqueue_gallery_scan: true
|
|
)
|
|
@users_enqueued_for_page_scan ||= Set.new
|
|
@users_enqueued_for_gallery_scan ||= Set.new
|
|
|
|
args = if user.persisted?
|
|
{ user: user }
|
|
else
|
|
unless user.url_name
|
|
logger.warn "user does not have a url name and is not persisted, skipping (#{user.name})"
|
|
return
|
|
end
|
|
|
|
{ url_name: user.url_name }
|
|
end.merge({ caused_by_entry: caused_by_entry })
|
|
|
|
if enqueue_page_scan && @users_enqueued_for_page_scan.add?(user.url_name)
|
|
if user.due_for_page_scan?
|
|
logger.info(
|
|
"enqueue user page job for #{user.url_name.bold}, " +
|
|
"last scanned #{time_ago_in_words(user.scanned_page_at)}"
|
|
)
|
|
enqueue_job(Domain::Fa::Job::UserPageJob, args)
|
|
end
|
|
end
|
|
|
|
if enqueue_gallery_scan && @users_enqueued_for_gallery_scan.add?(user.url_name)
|
|
if user.due_for_gallery_scan?
|
|
logger.info(
|
|
"enqueue user gallery job for #{user.url_name.bold}, " +
|
|
"last scanned #{time_ago_in_words(user.scanned_gallery_at)}"
|
|
)
|
|
enqueue_job(Domain::Fa::Job::UserGalleryJob, args)
|
|
end
|
|
end
|
|
end
|
|
|
|
def enqueue_post_scan(post, caused_by_entry, enqueue_pri)
|
|
enqueue_pri = case enqueue_pri
|
|
when :low then -5
|
|
when :high then -15
|
|
else -10
|
|
end
|
|
|
|
@posts_enqueued_for_scan ||= Set.new
|
|
if @posts_enqueued_for_scan.add?(post.fa_id)
|
|
fa_id_str = (post.fa_id || "(nil)").to_s.bold
|
|
if !post.scanned?
|
|
logger.info "enqueue post scan for fa_id #{fa_id_str}"
|
|
enqueue_job(Domain::Fa::Job::ScanPostJob, {
|
|
post: post,
|
|
caused_by_entry: caused_by_entry,
|
|
}, { priority: enqueue_pri })
|
|
elsif !post.have_file?
|
|
logger.info "enqueue file scan for fa_id #{fa_id_str}"
|
|
enqueue_job(Domain::Fa::Job::ScanFileJob, {
|
|
post: post,
|
|
caused_by_entry: caused_by_entry,
|
|
}, { priority: enqueue_pri })
|
|
end
|
|
end
|
|
end
|
|
end
|