Files
redux-scraper/app/jobs/domain/fa/job/browse_page_job.rb
2024-12-17 17:57:17 +00:00

63 lines
1.6 KiB
Ruby

class Domain::Fa::Job::BrowsePageJob < Domain::Fa::Job::Base
queue_as :fa_browse_page
ignore_signature_args :caused_by_entry
def perform(**args)
@caused_by_entry = args[:caused_by_entry]
@first_browse_page_entry = nil
@page_number = 1
@total_num_new_posts_seen = 0
@total_num_posts_seen = 0
while true
break unless scan_browse_page
break if @page_number > 150
@page_number += 1
end
logger.info(
"finished, #{@total_num_new_posts_seen.to_s.bold} new, #{@total_num_posts_seen.to_s.bold} total posts across #{@page_number.to_s.bold} pages"
)
end
private
def scan_browse_page
if @page_number == 1
url = "https://www.furaffinity.net/browse/"
else
url = "https://www.furaffinity.net/browse/#{@page_number}/"
end
response =
http_client.get(
url,
caused_by_entry: @first_browse_page_entry || @caused_by_entry
)
log_entry = response.log_entry
@first_browse_page_entry ||= log_entry
if response.status_code != 200
fatal_error(
"non 200 response for /browse: #{response.status_code.to_s.underline}"
)
end
page = Domain::Fa::Parser::Page.new(response.body)
listing_page_stats =
update_and_enqueue_posts_from_listings_page(
:browse_page,
page,
log_entry,
enqueue_posts_pri: :high,
page_desc: "Browse@#{@page_number}",
fill_id_gaps: true
)
@total_num_new_posts_seen += listing_page_stats.new_seen
@total_num_posts_seen += listing_page_stats.total_seen
listing_page_stats.new_seen > 0
end
end