Files
redux-scraper/app/jobs/domain/fa/job/user_gallery_job.rb
2025-01-01 03:29:53 +00:00

127 lines
3.7 KiB
Ruby

# typed: false
class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
queue_as :fa_user_gallery
MAX_PAGE_NUMBER = 350
def perform(args)
init_from_args!(args)
if @user.state != "ok" && @user.scanned_gallery_at
logger.warn("state == #{@user.state} and already scanned, skipping")
return
end
# buggy (sentinal) user
return if @user.id == 117_552 && @user.url_name == "click here"
@go_until_end = @user.scanned_gallery_at.nil?
@first_gallery_page_entry = nil
@max_page_number = MAX_PAGE_NUMBER
if @user.num_submissions && @user.scanned_page_at &&
@user.scanned_page_at > 3.days.ago
@max_page_number = (@user.num_submissions * 72) + 3
end
if !@user.due_for_gallery_scan? && !@force_scan
logger.warn(
"gallery scanned #{time_ago_in_words(@user.scanned_page_at)}, skipping",
)
return
end
@visited = Set.new
@folders =
Set.new(
[
{ href: "/gallery/#{@user.url_name}/", title: "Main Gallery" },
{ href: "/scraps/#{@user.url_name}/", title: "Scraps" },
],
)
until (@folders - @visited).empty?
folder = (@folders - @visited).first
@visited.add folder
break if scan_folder(folder) == :break
end
@user.log_entry_detail[
"last_gallery_page_id"
] = @first_gallery_page_entry&.id
@user.scanned_gallery_at = Time.now
@user.save!
end
private
def scan_folder(folder)
page_number = 1
total_num_new_posts_seen = 0
total_num_posts_seen = 0
while true
if page_number >= @max_page_number
fatal_error(
"hit max #{page_number}) pages, num submissions: #{@user.num_submissions}",
)
end
folder_href = folder[:href]
folder_href += "/" unless folder_href.end_with?("/")
folder_href = "/" + folder_href unless folder_href.start_with?("/")
page_url =
"https://www.furaffinity.net#{folder_href}#{page_number}?perpage=72"
response = http_client.get(page_url, caused_by_entry: @caused_by_entry)
@first_gallery_page_entry ||= response.log_entry
@caused_by_entry = @first_gallery_page_entry
if response.status_code != 200
fatal_error(
"http #{response.status_code}, log entry #{response.log_entry.id}",
)
end
if Domain::Fa::Job::ScanUserUtils.user_disabled_or_not_found?(
@user,
response,
)
logger.error("account disabled / not found, abort")
return :break
end
page = Domain::Fa::Parser::Page.new(response.body)
# newly instantiated users don't have a name yet, but can derive it from the gallery page
@user.name ||= page.user_page.name || @user.url_name
listing_page_stats =
update_and_enqueue_posts_from_listings_page(
:gallery_page,
page,
response.log_entry,
enqueue_posts_pri: :low,
enqueue_gallery_scan: false,
enqueue_page_scan: false,
page_desc: "#{folder[:title]}@#{page_number}",
)
total_num_new_posts_seen += listing_page_stats.new_seen
total_num_posts_seen += listing_page_stats.total_seen
page.submission_folders.each { |sf| @folders.add?(sf) } if @force_scan
page_number += 1
break if listing_page_stats.new_seen == 0 && !@go_until_end
break if listing_page_stats.total_seen < 72
end
logger.info "folder `#{folder[:title].bold}` complete - #{total_num_new_posts_seen.to_s.bold} new, #{total_num_posts_seen.to_s.bold} total"
ensure
if response && response.status_code == 200
enqueue_jobs_from_found_links(
response.log_entry,
suppress_jobs: [{ job: self.class, url_name: @user.url_name }],
)
end
end
end