Files
redux-scraper/app/jobs/domain/fa/job/fa_job_base.rb

168 lines
5.2 KiB
Ruby

class Domain::Fa::Job::FaJobBase < Scraper::JobBase
discard_on ActiveJob::DeserializationError
def self.build_http_client
@@fa_base_http_client ||= begin
Scraper::FaHttpClient.new
end
end
protected
def http_client
@http_client ||= self.class.build_http_client
end
def find_or_intitialize_user_from_args(args, caused_by_entry: nil)
args[:user] || begin
url_name = args[:url_name]&.downcase
fatal_error("arg 'url_name' is required if arg 'user' is nil") if url_name.blank?
Domain::Fa::User.find_or_create_by(url_name: url_name) do |user|
user.state_detail ||= {}
user.state_detail["first_seen_entry"] = caused_by_entry.id if caused_by_entry
end
end
end
ListingsPageScanStats = Struct.new(
:new_seen,
:total_seen,
:last_was_new,
)
def update_and_enqueue_posts_from_listings_page(
job_type,
page,
caused_by_entry,
enqueue_posts_pri:,
enqueue_page_scan: true,
enqueue_gallery_scan: true,
page_desc: nil
)
fatal_error("not a listings page") unless page.probably_listings_page?
submissions = page.submissions_parsed
page_desc = if page_desc
"page #{page_desc.to_s.bold}"
else
"page"
end
listing_page_stats = ListingsPageScanStats.new(0, 0, false)
submissions.each do |submission|
post = Domain::Fa::Post.find_or_initialize_by(fa_id: submission.id)
listing_page_stats.last_was_new = post.new_record?
listing_page_stats.new_seen += 1 if post.new_record?
listing_page_stats.total_seen += 1
update_post_from_listings_page(job_type, post, submission, caused_by_entry)
enqueue_user_scan(
post.creator,
caused_by_entry,
enqueue_page_scan: enqueue_page_scan,
enqueue_gallery_scan: enqueue_gallery_scan,
) if post.creator
case post.state.to_sym
when :ok
enqueue_post_scan(post, caused_by_entry, enqueue_posts_pri)
when :removed
logger.info "(todo) removed post seen in listing page, enqueue scan for fa_id #{post.fa_id}"
when :scan_error
logger.info "(todo) scan_error'd post seen in listing page for fa_id #{post.fa_id}"
when :file_error
logger.info "(todo) file_error'd post seen in listing page for fa_id #{post.fa_id}"
else
logger.info "unknown post state `#{post.state}` for fa_id #{post.fa_id}"
end
end
logger.info "#{page_desc} has #{submissions.count.to_s.bold} posts, " +
"#{listing_page_stats.new_seen.to_s.bold} new"
listing_page_stats
end
def update_post_from_listings_page(job_type, post, submission, caused_by_entry)
if job_type == :browse_page
post.log_entry_detail["first_browse_page_id"] ||= caused_by_entry.id
elsif job_type == :gallery_page
post.log_entry_detail["first_gallery_page_id"] ||= caused_by_entry.id
else
fatal_error("unhandled job_type: #{job_type}")
end
post.creator ||= Domain::Fa::User.find_or_build_from_submission_parser(submission)
post.title = submission.title || fatal_error("blank title")
post.thumbnail_uri = submission.thumb_path || fatal_error("blank thumb_path")
post.save!
end
def enqueue_user_scan(
user,
caused_by_entry,
enqueue_page_scan: true,
enqueue_gallery_scan: true
)
@users_enqueued_for_page_scan ||= Set.new
@users_enqueued_for_gallery_scan ||= Set.new
args = if user.persisted?
{ user: user }
else
unless user.url_name
logger.warn "user does not have a url name and is not persisted, skipping (#{user.name})"
return
end
{ url_name: user.url_name }
end.merge({ caused_by_entry: caused_by_entry })
if enqueue_page_scan && @users_enqueued_for_page_scan.add?(user.url_name)
if user.due_for_page_scan?
logger.info(
"enqueue user page job for #{user.url_name.bold}, " +
"last scanned #{time_ago_in_words(user.scanned_page_at)}"
)
enqueue_job(Domain::Fa::Job::UserPageJob, args)
end
end
if enqueue_gallery_scan && @users_enqueued_for_gallery_scan.add?(user.url_name)
if user.due_for_gallery_scan?
logger.info(
"enqueue user gallery job for #{user.url_name.bold}, " +
"last scanned #{time_ago_in_words(user.scanned_gallery_at)}"
)
enqueue_job(Domain::Fa::Job::UserGalleryJob, args)
end
end
end
def enqueue_post_scan(post, caused_by_entry, enqueue_pri)
enqueue_pri = case enqueue_pri
when :low then -5
when :high then -15
else -10
end
@posts_enqueued_for_scan ||= Set.new
if @posts_enqueued_for_scan.add?(post.fa_id)
fa_id_str = (post.fa_id || "(nil)").to_s.bold
if !post.scanned?
logger.info "enqueue post scan for fa_id #{fa_id_str}"
enqueue_job(Domain::Fa::Job::ScanPostJob, {
post: post,
caused_by_entry: caused_by_entry,
}, { priority: enqueue_pri })
elsif !post.have_file?
logger.info "enqueue file scan for fa_id #{fa_id_str}"
enqueue_job(Domain::Fa::Job::ScanFileJob, {
post: post,
caused_by_entry: caused_by_entry,
}, { priority: enqueue_pri })
end
end
end
end