Files
redux-scraper/app/jobs/domain/fa/job/user_gallery_job.rb
2025-02-25 05:47:44 +00:00

160 lines
4.4 KiB
Ruby

# typed: strict
class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
queue_as :fa_user_gallery
queue_with_priority do
T.bind(self, Domain::Fa::Job::UserGalleryJob)
user_from_args!.scanned_gallery_at.nil? ? -13 : -3
end
MAX_PAGE_NUMBER = 350
LISTINGS_PER_PAGE_THRESHOLD = 72
class Folder < T::ImmutableStruct
include T::Struct::ActsAsComparable
const :href, String
const :title, String
end
sig { params(args: T.untyped).void }
def initialize(*args)
super(*T.unsafe(args))
@go_until_end = T.let(false, T::Boolean)
@max_page_number = T.let(MAX_PAGE_NUMBER, Integer)
@visited = T.let(Set.new, T::Set[Folder])
@folders = T.let(Set.new, T::Set[Folder])
end
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)
user = user_from_args!
if user.state != "ok" && user.scanned_gallery_at
logger.warn("state == #{user.state} and already scanned, skipping")
return
end
# buggy (sentinal) user
return if user.id == 117_552 && user.url_name == "click here"
@go_until_end = user.gallery_scan.at.nil?
if !user.gallery_scan.due? && !force_scan?
logger.warn("gallery scanned #{user.gallery_scan.ago_in_words}, skipping")
return
end
@visited = Set.new
@folders =
Set.new(
[
Folder.new(href: "/gallery/#{user.url_name}/", title: "Main Gallery"),
Folder.new(href: "/scraps/#{user.url_name}/", title: "Scraps"),
],
)
until (@folders - @visited).empty?
folder = (@folders - @visited).first
@visited.add folder
should_break = T.let(false, T::Boolean)
logger.tagged(make_tag("folder.title", folder.title)) do
should_break = scan_folder(user, folder) == :break
end
break if should_break
end
user.last_gallery_page_id = first_log_entry&.id
user.scanned_gallery_at = Time.current
ensure
user.save! if user
end
private
sig do
params(user: Domain::User::FaUser, folder: Folder).returns(
T.nilable(Symbol),
)
end
def scan_folder(user, folder)
page_number = 1
total_num_new_posts_seen = 0
total_num_posts_seen = 0
while true
if page_number >= @max_page_number
fatal_error(
"hit max #{page_number}) pages, num submissions: #{user.num_submissions}",
)
end
folder_href = folder.href
folder_href += "/" unless folder_href.end_with?("/")
folder_href = "/" + folder_href unless folder_href.start_with?("/")
page_url =
"https://www.furaffinity.net#{folder_href}#{page_number}?perpage=72"
response = http_client.get(page_url)
fatal_error("failed to scan folder page") if response.status_code != 200
if Domain::Fa::Job::ScanUserUtils.user_disabled_or_not_found?(
user,
response,
)
logger.error("account disabled / not found, abort")
user.state = "account_disabled"
return :break
end
page = Domain::Fa::Parser::Page.new(response.body)
# newly instantiated users don't have a name yet, but can derive it from the gallery page
user.name ||= page.user_page.name || user.url_name
listing_page_stats =
update_and_enqueue_posts_from_listings_page(
ListingPageType::GalleryPage.new(page_number:, folder: folder.title),
page_parser: page,
for_user: user,
)
total_num_new_posts_seen += listing_page_stats.new_posts.count
total_num_posts_seen += listing_page_stats.all_posts.count
logger.info(
format_tags(
make_tag("page_number", page_number),
make_tag("new on page", listing_page_stats.new_posts.count),
make_tag("total on page", listing_page_stats.all_posts.count),
),
)
if scan_folders?
page.submission_folders.each do |sf|
@folders.add?(
Folder.new(href: T.must(sf[:href]), title: T.must(sf[:title])),
)
end
end
page_number += 1
break if listing_page_stats.new_posts.empty? && !@go_until_end
break if listing_page_stats.all_posts.count < LISTINGS_PER_PAGE_THRESHOLD
end
logger.info(
format_tags(
"complete",
make_tag("num_new", total_num_new_posts_seen),
make_tag("num_total", total_num_posts_seen),
),
)
:continue
end
private
sig { returns(T::Boolean) }
def scan_folders?
!!arguments[0][:scan_folders]
end
end